TahaRasouli commited on
Commit
e31495c
·
verified ·
1 Parent(s): 2d540cf

Update evaluator.py

Browse files
Files changed (1) hide show
  1. evaluator.py +778 -378
evaluator.py CHANGED
@@ -1,511 +1,911 @@
1
- import os
2
- import base64
3
- from groq import Groq
4
- import streamlit as st
5
 
6
- class IELTSTask2Evaluator:
7
- def __init__(self):
8
- self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
9
- self.model = "openai/gpt-oss-20b"
10
 
11
- def evaluate_essay(self, essay_question, essay_content):
12
- score_prompt = """ Provide the scores for the following IELTS task 2 essay in terms of Cohesion & Coherence, Task Achievement, Lexical Resouces and Grammatical Range & Accuracy critera. The scores should be in IELTS framework. Each criterion should be scored between 1 to 9 and should be integer. E.g. 5.5 is not acceptable! For each criteria that you score also provide 3 different comments using the following band descriptors and by referncing the essay.
13
 
14
- IELTS Scoring band descriptor:
15
- Task Achievement: Score 9: • fully addresses all parts of the task
16
- • presents a fully developed position in answer to the question with relevant, fully extended and well supported ideas
17
 
18
- 8: • sufficiently addresses all parts of the task
19
- • presents a well-developed response to the question with relevant, extended and supported ideas
20
 
21
- 7: • addresses all parts of the task
22
- • presents a clear position throughout the response
23
- • presents, extends and supports main ideas, but there may be a tendency to over-generalise and/or supporting ideas may lack focus
24
 
25
- 6: • addresses all parts of the task although some parts may be more fully covered than others
26
- • presents a relevant position although the conclusions may become unclear or repetitive
27
- • presents relevant main ideas but some may be inadequately developed/unclear
28
 
29
- 5: • addresses the task only partially; the format may be inappropriate in places
30
- • expresses a position but the development is not always clear and there may be no conclusions drawn
31
- • presents some main ideas but these are limited and not sufficiently developed; there may be irrelevant detail
32
 
33
- 4: • responds to the task only in a minimal way or the answer is tangential; the format may be inappropriate
34
- • presents a position but this is unclear
35
- • presents some main ideas but these are difficult to identify and may be repetitive, irrelevant or not well supported
36
 
37
- 3: • does not adequately address any part of the task
38
- • does not express a clear position
39
- • presents few ideas, which are largely undeveloped or irrelevant
40
 
41
- 2: • barely responds to the task
42
- • does not express a position
43
- • may attempt to present one or two ideas but there is no development
44
- 1: • answer is completely unrelated to the task
45
 
46
- Cohesion and Coherence: Score 9: • uses cohesion in such a way that it attracts no attention
47
- • skilfully manages paragraphing
48
 
49
 
50
- Score 8: • sequences information and ideas logically
51
- • manages all aspects of cohesion well
52
- • uses paragraphing sufficiently and appropriately
53
 
54
- Score 7: • logically organises information and ideas; there is clear progression throughout
55
- • uses a range of cohesive devices appropriately although there may be some under-/over-use
56
- • presents a clear central topic within each paragraph
57
 
58
- Score 6: • arranges information and ideas coherently and there is a clear overall progression
59
- • uses cohesive devices effectively, but cohesion within and/or between sentences may be faulty or mechanical
60
- • may not always use referencing clearly or appropriately
61
- • uses paragraphing, but not always logically
62
 
63
- Score 5: • presents information with some organisation but there may be a lack of overall progression
64
- • makes inadequate, inaccurate or over-use of cohesive devices
65
- • may be repetitive because of lack of referencing and substitution
66
- • may not write in paragraphs, or paragraphing may be inadequate
67
 
68
- Score 4: • presents information and ideas but these are not arranged coherently and there is no clear progression in the response
69
- • uses some basic cohesive devices but these may be inaccurate or repetitive
70
- • may not write in paragraphs or their use may be confusing
71
 
72
- Score 3: • does not organise ideas logically
73
- • may use a very limited range of cohesive devices, and those used may not indicate a logical relationship between ideas
74
 
75
- Score 2: • has very little control of organisational features
76
 
77
- Score 1: • fails to communicate any message
78
 
79
 
80
- Lexical Resouce: Score 9: • uses a wide range of vocabulary with very natural and sophisticated control of lexical features; rare minor errors occur only as 'slips'
81
 
82
 
83
- Score 8: • uses a wide range of vocabulary fluently and flexibly to convey precise meanings
84
- • skilfully uses uncommon lexical items but there may be occasional inaccuracies in word choice and collocation
85
- • produces rare errors in spelling and/or word formation
86
 
87
 
88
 
89
- Score 7: • uses a sufficient range of vocabulary to allow some flexibility and precision
90
- • uses less common lexical items with some awareness of style and collocation
91
- • may produce occasional errors in word choice, spelling and/or word formation
92
 
93
 
94
- Score 6: • uses an adequate range of vocabulary for the task
95
- • attempts to use less common vocabulary but with some inaccuracy
96
- • makes some errors in spelling and/or word formation, but they do not impede communication
97
 
98
 
99
- Score 5: • uses a limited range of vocabulary, but this is minimally adequate for the task
100
- • may make noticeable errors in spelling and/or word formation that may cause some difficulty for the reader
101
 
102
 
103
- Score 4: • uses only basic vocabulary which may be used repetitively or which may be inappropriate for the task
104
- • has limited control of word formation and/or spelling; errors may cause strain for the reader
105
 
106
 
107
- Score 3: • uses only a very limited range of words and expressions with very limited control of word formation and/or spelling
108
- • errors may severely distort the message
109
 
110
 
111
- Score 2: • uses an extremely limited range of vocabulary; essentially no control of word formation and/or spelling
112
 
113
 
114
- Score 1: • can only use a few isolated words
115
 
116
 
117
 
118
 
119
 
120
- Grammatical Range and Accuracy: Score 9: • uses a wide range of structures with full flexibility and accuracy; rare minor errors occur only as 'slips'
121
 
122
 
123
- Score 8: • uses a wide range of structures
124
- • the majority of sentences are error-free
125
- • makes only very occasional errors or inappropriacies
126
 
127
 
128
- Score 7: • uses a variety of complex structures
129
- • produces frequent error-free sentences
130
- • has good control of grammar and punctuation but may make a few errors
131
 
132
 
133
- Score 6: • uses a mix of simple and complex sentence forms
134
- • makes some errors in grammar and punctuation but they rarely reduce communication
135
 
136
 
137
- Score 5: • uses only a limited range of structures
138
- • attempts complex sentences but these tend to be less accurate than simple sentences
139
- • may make frequent grammatical errors and punctuation may be faulty; errors can cause some difficulty for the reader
140
 
141
 
142
- Score 4: • uses only a very limited range of structures with only rare use of subordinate clauses
143
- • some structures are accurate but errors predominate, and punctuation is often faulty
144
 
145
 
146
- Score 3: • attempts sentence forms but errors in grammar and punctuation predominate and distort the meaning
147
 
148
 
149
- Score 2: • cannot use sentence forms except in memorised phrases
150
 
151
 
152
- Score 1: • cannot use sentence forms at all
153
 
154
- Essay Question: {}
155
- Essay: {}
156
- """
157
- return self._get_completion(score_prompt.format(essay_question, essay_content))
158
 
159
- def analyze_grammar(self, essay_content):
160
- gram_prompt = """ Provide a grammatical analysis of the following essay with respect to the content a B1 leanrner must know. Provide all the grammars used and their accuracy pecentage. E.g. "Simple Present: 70%". Also provide comments for inaccurate structures. Don't give me any exercises.
161
 
162
- Essay: {}
163
- """
164
- return self._get_completion(gram_prompt.format(essay_content))
165
 
166
- def analyze_vocabulary(self, essay_content):
167
- vocab_prompt = """ Provide the lexical errors and provide their correct forms. For each of the vocabulary related to the context of the essay that you think could be improved, give suggesions. Provide full sentence examples for these suggestions.
168
 
169
- Essay: {}
170
- """
171
- return self._get_completion(vocab_prompt.format(essay_content))
172
 
173
- def analyze_cohesion_coherence(self, essay_content):
174
- coh_coh_prompt = """ Evaluate the cohesion and coherence of essay based on the following aspects. DO NOT SCORE THEM:
175
 
176
- - paraphrasing ability
177
- - logical sequencing of ideas
178
- - use and accuracy of cohesive devices
179
- - existence of a clear central topic in each paragrpah
180
- - correct paragraphing
181
 
182
 
183
- Essay: {}
184
- """
185
- return self._get_completion(coh_coh_prompt.format(essay_content))
186
 
187
- def analyze_task_achievement(self, essay_question, essay_content):
188
- task_ach_prompt = """ Evaluate the Task Achievement factor of this IELTS essay based on the following question and essay. Do NOT provide scores. Just mention analysis and how it could be imporved.
189
 
190
- Question: {}
191
 
192
 
193
- Essay: {}
194
- """
195
- return self._get_completion(task_ach_prompt.format(essay_question, essay_content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
197
  def _get_completion(self, prompt):
198
  response = self.client.chat.completions.create(
199
- messages=[{"role": "user", "content": prompt}],
200
- model=self.model
201
  )
202
- return response.choices[0].message.content
 
203
 
204
 
 
 
 
205
  class IELTSTask2ExerciseGenerator:
206
  def __init__(self):
207
- self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
208
- self.model = "llama3-8b-8192"
209
 
210
  def generate_grammar_exercises(self, grammar_analysis, essay_content):
211
- gram_exer_prompt = """ Based on the content of the grammar analysis provided in JSON format, make 4 different exercise types and provide 10 exercises for each. All the items should be in the context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
 
212
 
213
- Grammar Analysis: {}
214
-
215
- Essay: {}
216
- """
217
- return self._get_completion(gram_exer_prompt.format(grammar_analysis, essay_content))
 
 
 
 
 
 
 
 
218
 
219
  def generate_vocabulary_exercises(self, vocab_analysis):
220
- vocab_exer_prompt = """ Based on the lexical errors, provide exercises in the same context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
 
221
 
222
- Lexical Errors: {}
223
- """
224
- return self._get_completion(vocab_exer_prompt.format(vocab_analysis))
 
 
 
 
 
 
225
 
226
  def _get_completion(self, prompt):
227
  response = self.client.chat.completions.create(
 
228
  messages=[{"role": "user", "content": prompt}],
229
- model=self.model
230
  )
231
- return response.choices[0].message.content
232
 
233
 
 
 
 
 
234
  class IELTSTask1Evaluator:
235
  def __init__(self):
236
- self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
237
- self.text_model = "llama3-8b-8192"
238
- self.vision_model = "llama-3.2-11b-vision-preview"
239
 
240
- def encode_image(self, image_path):
241
- with open(image_path, "rb") as image_file:
242
- return base64.b64encode(image_file.read()).decode('utf-8')
243
 
 
 
 
244
  def analyze_graph(self, image_path, question):
245
- """Analyzes the graph/chart provided for Task 1 considering the question"""
246
- base64_image = self.encode_image(image_path)
247
  response = self.client.chat.completions.create(
 
248
  messages=[
249
  {
250
  "role": "user",
251
  "content": [
252
- {
253
- "type": "text",
254
- "text": f"""Given this IELTS Task 1 question: {question}
255
-
256
- Please analyze the graph/chart and:
257
- 1. List the key features that need to be described based on the question requirements
258
- 2. Identify the main trends, patterns, or comparisons required by the question
259
- 3. Note any significant data points that should be highlighted
260
- 4. Specify what should be included in the overview
261
- 5. Indicate what details should be included in the body paragraphs
262
-
263
- Provide your analysis in a structured format that can be used to evaluate student responses."""
264
- },
265
- {
266
- "type": "image_url",
267
- "image_url": {
268
- "url": f"data:image/jpeg;base64,{base64_image}",
269
- },
270
- },
271
- ],
272
  }
273
- ],
274
- model=self.vision_model,
275
  )
276
- return response.choices[0].message.content
277
-
 
 
 
278
  def evaluate_task1_response(self, question, graph_analysis, written_response):
279
- score_prompt = """Provide the scores for the following IELTS Task 1 essay based on the graph analysis provided and the written response. Score in terms of Task Achievement, Coherence and Cohesion, Lexical Resource, and Grammatical Range & Accuracy criteria. The scores should be in IELTS framework. Each criterion should be scored between 1 to 9 and should be integer. E.g. 5.5 is not acceptable! For each criteria that you score, provide 3 different comments using the band descriptors and by referencing both the key features of the graph and how well they were described in the response.
280
- IELTS Task 1 Band Descriptors:
281
-
282
- Band 9:
283
- Fully satisfies all requirements of the task
284
- Clearly presents a fully developed response
285
- Uses cohesion in such a way that it attracts no attention
286
- • Skilfully manages paragraphing
287
- Uses a wide range of vocabulary with very natural and sophisticated control of lexical features; rare minor errors occur only as 'slips'
288
- • Uses a wide range of structures with full flexibility and accuracy; rare minor errors occur only as 'slips'
289
-
290
- Band 8:
291
- • Covers all requirements of the task sufficiently
292
- • Presents, highlights and illustrates key features/bullet points clearly and appropriately
293
- • Sequences information and ideas logically
294
- • Manages all aspects of cohesion well
295
- Uses paragraphing sufficiently and appropriately
296
- • Uses a wide range of vocabulary fluently and flexibly to convey precise meanings
297
- Skilfully uses uncommon lexical items but there may be occasional inaccuracies in word choice and collocation
298
- Produces rare errors in spelling and/or word formation
299
- Uses a wide range of structures
300
- The majority of sentences are error-free
301
- Makes only very occasional errors or inappropriacies
302
-
303
- Band 7:
304
- • Covers the requirements of the task
305
- Presents a clear overview of main trends, differences or stages
306
- Clearly presents and highlights key features/bullet points but could be more fully extended
307
- Logically organises information and ideas; there is clear progression throughout
308
- Uses a range of cohesive devices appropriately although there may be some under-/over-use
309
- Uses a sufficient range of vocabulary to allow some flexibility and precision
310
- • Uses less common lexical items with some awareness of style and collocation
311
- • May produce occasional errors in word choice, spelling and/or word formation
312
- • Uses a variety of complex structures
313
- • Produces frequent error-free sentences
314
- Has good control of grammar and punctuation but may make a few errors
315
-
316
- Band 6:
317
- Addresses the requirements of the task
318
- Presents an overview with information appropriately selected
319
- • Presents and adequately highlights key features/bullet points but details may be irrelevant, inappropriate or inaccurate
320
- • Arranges information and ideas coherently and there is a clear overall progression
321
- Uses cohesive devices effectively, but cohesion within and/or between sentences may be faulty or mechanical
322
- May not always use referencing clearly or appropriately
323
- Uses an adequate range of vocabulary for the task
324
- Attempts to use less common vocabulary but with some inaccuracy
325
- • Makes some errors in spelling and/or word formation, but they do not impede communication
326
- • Uses a mix of simple and complex sentence forms
327
- • Makes some errors in grammar and punctuation but they rarely reduce communication
328
-
329
- Band 5:
330
- • Generally addresses the task; the format may be inappropriate in places
331
- Recounts detail mechanically with no clear overview; there may be no data to support the description
332
- Presents, but inadequately covers, key features/bullet points; there may be a tendency to focus on details
333
- Presents information with some organisation but there may be a lack of overall progression
334
- • Makes inadequate, inaccurate or over-use of cohesive devices
335
- • May be repetitive because of lack of referencing and substitution
336
- Uses a limited range of vocabulary, but this is minimally adequate for the task
337
- May make noticeable errors in spelling and/or word formation that may cause some difficulty for the reader
338
- Uses only a limited range of structures
339
- Attempts complex sentences but these tend to be less accurate than simple sentences
340
- • May make frequent grammatical errors and punctuation may be faulty; errors can cause some difficulty for the reader
341
-
342
- Band 4:
343
- • Attempts to address the task but does not cover all key features/bullet points; the format may be inappropriate
344
- • May confuse key features/bullet points with detail; parts may be unclear, irrelevant, repetitive or inaccurate
345
- • Presents information and ideas but these are not arranged coherently and there is no clear progression
346
- • Uses some basic cohesive devices but these may be inaccurate or repetitive
347
- • Uses only basic vocabulary which may be used repetitively or which may be inappropriate for the task
348
- • Has limited control of word formation and/or spelling; errors may cause strain for the reader
349
- Uses only a very limited range of structures with only rare use of subordinate clauses
350
- • Some structures are accurate but errors predominate, and punctuation is often faulty
351
-
352
- Band 3:
353
- Fails to address the task, which may have been completely misunderstood
354
- • Presents limited ideas which may be largely irrelevant/repetitive
355
- Does not organise ideas logically
356
- • May use a very limited range of cohesive devices, and those used may not indicate a logical relationship between ideas
357
- • Uses only a very limited range of words and expressions with very limited control of word formation and/or spelling
358
- • Errors may severely distort the message
359
- • Attempts sentence forms but errors in grammar and punctuation predominate and distort the meaning
360
-
361
- Band 2:
362
- • Answer is barely related to the task
363
- • Has very little control of organisational features
364
- • Uses an extremely limited range of vocabulary; essentially no control of word formation and/or spelling
365
- • Cannot use sentence forms except in memorised phrases
366
-
367
- Band 1:
368
- • Answer is completely unrelated to the task
369
- • Fails to communicate any message
370
- • Can only use a few isolated words
371
- • Cannot use sentence forms at all
372
-
373
- Band 0:
374
- • Does not attend
375
- • Does not attempt the task in any way
376
- • Writes a totally memorised response
377
- Question: {}
378
- Graph Analysis: {}
379
- Written Response: {}
380
- """
381
- return self._get_completion(score_prompt.format(question, graph_analysis, written_response))
382
-
383
- def analyze_grammar(self, written_response):
384
- gram_prompt = """Provide a grammatical analysis of the following Task 1 response with respect to describing trends, comparisons, and data. Provide all the grammars used and their accuracy percentage. E.g. "Past tense: 70%". Also provide comments for inaccurate structures. Focus especially on:
385
- - Tense usage for trends
386
- - Comparative structures
387
- - Passive voice
388
- - Articles with data
389
- - Prepositions with trends
390
-
391
- Response: {}
392
- """
393
- return self._get_completion(gram_prompt.format(written_response))
394
-
395
- def analyze_vocabulary(self, written_response):
396
- vocab_prompt = """Analyze the vocabulary used in this Task 1 response, focusing on:
397
- - Language for describing trends
398
- - Comparison vocabulary
399
- - Data reporting vocabulary
400
- - Graph/chart-specific terminology
401
- Provide suggestions for improvement with examples in similar contexts.
402
-
403
- Response: {}
404
- """
405
- return self._get_completion(vocab_prompt.format(written_response))
406
-
407
- def analyze_task_achievement(self, question, graph_analysis, written_response):
408
- task_ach_prompt = """Compare the key features identified in the graph analysis with how they were covered in the written response. Evaluate:
409
- - Overview presence and quality
410
- - Key feature selection
411
- - Data accuracy
412
- - Trend description completeness
413
- Do NOT provide scores, only analysis and suggestions for improvement.
414
-
415
- Question: {}
416
- Graph Analysis: {}
417
- Written Response: {}
418
- """
419
- return self._get_completion(task_ach_prompt.format(question, graph_analysis, written_response))
420
-
421
- def analyze_cohesion_coherence(self, written_response, graph_analysis):
422
- coh_coh_prompt = """Evaluate the cohesion and coherence of essay based on the following aspects. DO NOT SCORE THEM:
423
- - paraphrasing ability
424
- - logical sequencing of ideas
425
- - use and accuracy of cohesive devices
426
- - existence of a clear central topic in each paragraph
427
- - correct paragraphing
428
-
429
- Graph Analysis: {}
430
- Response: {}
431
- """
432
- return self._get_completion(coh_coh_prompt.format(graph_analysis, written_response))
433
-
434
  def _get_completion(self, prompt):
435
- response = self.client.chat.completions.create(
436
- messages=[{"role": "user", "content": prompt}],
437
- model=self.text_model
438
  )
439
- return response.choices[0].message.content
440
-
441
-
442
 
 
 
 
 
443
  class IELTSTask1ExerciseGenerator:
444
  def __init__(self):
445
- self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
446
- self.model = "llama3-8b-8192"
447
 
448
  def generate_grammar_exercises(self, grammar_analysis, essay_content):
449
- gram_exer_prompt = """ Based on the content of the grammar analysis provided in JSON format, make 4 different exercise types and provide 10 exercises for each. All the items should be in the context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- Grammar Analysis: {}
452
- Essay: {}
453
- """
454
- return self._get_completion(gram_exer_prompt.format(grammar_analysis, essay_content))
455
-
456
  def generate_graph_description_exercises(self, graph_analysis):
457
- exercise_prompt = """Based on the graph analysis provided, create exercises to practice:
458
- 1. Writing overviews (1 exercise)
459
- 2. Describing trends (1 exercise)
460
- 3. Comparing data points (1 exercises)
461
- 4. Selecting key features (1 exercises)
462
 
463
- Keep all exercises relevant to the graph context.
 
 
 
464
 
465
- Graph Analysis: {}
 
466
  """
467
- return self._get_completion(exercise_prompt.format(graph_analysis))
468
 
469
  def generate_vocabulary_exercises(self, vocab_analysis):
470
- vocab_prompt = """Create targeted vocabulary exercises for Task 1 writing based on the vocabulary analysis. Include:
471
- 1. Trend description vocabulary
472
- 2. Comparison language
473
- 3. Data reporting phrases
474
- 4. Graph-specific terminology
 
 
475
 
476
- Analysis: {}
 
477
  """
478
- return self._get_completion(vocab_prompt.format(vocab_analysis))
479
 
480
  def _get_completion(self, prompt):
481
- response = self.client.chat.completions.create(
 
482
  messages=[{"role": "user", "content": prompt}],
483
- model=self.model
484
  )
485
- return response.choices[0].message.content
 
486
 
487
 
 
 
 
488
  def llm_responder_t1(image_path, question, written_response):
489
- task1_evaluator = IELTSTask1Evaluator()
490
- task1_generator = IELTSTask1ExerciseGenerator()
491
- graph_analysis = task1_evaluator.analyze_graph(image_path)
492
- general_analysis = task1_evaluator.evaluate_essay(question, graph_analysis, written_response)
493
- ga = task1_evaluator.analyze_grammar(written_response)
494
- lr = task1_evaluator.analyze_vocabulary(written_response)
495
- ta = task1_evaluator.analyze_task_achievement(question, graph_analysis, written_response)
496
- ga_exercise = task1_generator.generate_grammar_exercises(ga, written_response)
497
- lr_exercise = task1_generator.generate_vocabulary_exercises(lr)
498
- return ga, lr, cc, ta, ga_exercise, lr_exercise
 
 
 
 
499
 
500
 
501
  def llm_responder_t2(question, written_response):
502
- task2_evaluator = IELTSTask2Evaluator()
503
- task2_generator = IELTSTask2ExerciseGenerator()
504
- general_analysis = task2_evaluator.evaluate_essay(essay_question, essay_content)
505
- ga = task2_evaluator.analyze_grammar(written_response)
506
- lr = task2_evaluator.analyze_vocabulary(written_response)
507
- ta = task2_evaluator.analyze_task_achievement(essay_question, essay_content)
508
- ga_exercise = task2_generator.generate_grammar_exercises(ga, essay_content)
509
- lr_exercise = task2_generator.generate_vocabulary_exercises(lr)
510
-
511
- return ga, lr, cc, ta, ga_exercise, lr_exercise
 
 
 
 
1
+ # import os
2
+ # import base64
3
+ # from groq import Groq
4
+ # import streamlit as st
5
 
6
+ # class IELTSTask2Evaluator:
7
+ # def __init__(self):
8
+ # self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
9
+ # self.model = "openai/gpt-oss-20b"
10
 
11
+ # def evaluate_essay(self, essay_question, essay_content):
12
+ # score_prompt = """ Provide the scores for the following IELTS task 2 essay in terms of Cohesion & Coherence, Task Achievement, Lexical Resouces and Grammatical Range & Accuracy critera. The scores should be in IELTS framework. Each criterion should be scored between 1 to 9 and should be integer. E.g. 5.5 is not acceptable! For each criteria that you score also provide 3 different comments using the following band descriptors and by referncing the essay.
13
 
14
+ # IELTS Scoring band descriptor:
15
+ # Task Achievement: Score 9: • fully addresses all parts of the task
16
+ # • presents a fully developed position in answer to the question with relevant, fully extended and well supported ideas
17
 
18
+ # 8: • sufficiently addresses all parts of the task
19
+ # • presents a well-developed response to the question with relevant, extended and supported ideas
20
 
21
+ # 7: • addresses all parts of the task
22
+ # • presents a clear position throughout the response
23
+ # • presents, extends and supports main ideas, but there may be a tendency to over-generalise and/or supporting ideas may lack focus
24
 
25
+ # 6: • addresses all parts of the task although some parts may be more fully covered than others
26
+ # • presents a relevant position although the conclusions may become unclear or repetitive
27
+ # • presents relevant main ideas but some may be inadequately developed/unclear
28
 
29
+ # 5: • addresses the task only partially; the format may be inappropriate in places
30
+ # • expresses a position but the development is not always clear and there may be no conclusions drawn
31
+ # • presents some main ideas but these are limited and not sufficiently developed; there may be irrelevant detail
32
 
33
+ # 4: • responds to the task only in a minimal way or the answer is tangential; the format may be inappropriate
34
+ # • presents a position but this is unclear
35
+ # • presents some main ideas but these are difficult to identify and may be repetitive, irrelevant or not well supported
36
 
37
+ # 3: • does not adequately address any part of the task
38
+ # • does not express a clear position
39
+ # • presents few ideas, which are largely undeveloped or irrelevant
40
 
41
+ # 2: • barely responds to the task
42
+ # • does not express a position
43
+ # • may attempt to present one or two ideas but there is no development
44
+ # 1: • answer is completely unrelated to the task
45
 
46
+ # Cohesion and Coherence: Score 9: • uses cohesion in such a way that it attracts no attention
47
+ # • skilfully manages paragraphing
48
 
49
 
50
+ # Score 8: • sequences information and ideas logically
51
+ # • manages all aspects of cohesion well
52
+ # • uses paragraphing sufficiently and appropriately
53
 
54
+ # Score 7: • logically organises information and ideas; there is clear progression throughout
55
+ # • uses a range of cohesive devices appropriately although there may be some under-/over-use
56
+ # • presents a clear central topic within each paragraph
57
 
58
+ # Score 6: • arranges information and ideas coherently and there is a clear overall progression
59
+ # • uses cohesive devices effectively, but cohesion within and/or between sentences may be faulty or mechanical
60
+ # • may not always use referencing clearly or appropriately
61
+ # • uses paragraphing, but not always logically
62
 
63
+ # Score 5: • presents information with some organisation but there may be a lack of overall progression
64
+ # • makes inadequate, inaccurate or over-use of cohesive devices
65
+ # • may be repetitive because of lack of referencing and substitution
66
+ # • may not write in paragraphs, or paragraphing may be inadequate
67
 
68
+ # Score 4: • presents information and ideas but these are not arranged coherently and there is no clear progression in the response
69
+ # • uses some basic cohesive devices but these may be inaccurate or repetitive
70
+ # • may not write in paragraphs or their use may be confusing
71
 
72
+ # Score 3: • does not organise ideas logically
73
+ # • may use a very limited range of cohesive devices, and those used may not indicate a logical relationship between ideas
74
 
75
+ # Score 2: • has very little control of organisational features
76
 
77
+ # Score 1: • fails to communicate any message
78
 
79
 
80
+ # Lexical Resouce: Score 9: • uses a wide range of vocabulary with very natural and sophisticated control of lexical features; rare minor errors occur only as 'slips'
81
 
82
 
83
+ # Score 8: • uses a wide range of vocabulary fluently and flexibly to convey precise meanings
84
+ # • skilfully uses uncommon lexical items but there may be occasional inaccuracies in word choice and collocation
85
+ # • produces rare errors in spelling and/or word formation
86
 
87
 
88
 
89
+ # Score 7: • uses a sufficient range of vocabulary to allow some flexibility and precision
90
+ # • uses less common lexical items with some awareness of style and collocation
91
+ # • may produce occasional errors in word choice, spelling and/or word formation
92
 
93
 
94
+ # Score 6: • uses an adequate range of vocabulary for the task
95
+ # • attempts to use less common vocabulary but with some inaccuracy
96
+ # • makes some errors in spelling and/or word formation, but they do not impede communication
97
 
98
 
99
+ # Score 5: • uses a limited range of vocabulary, but this is minimally adequate for the task
100
+ # • may make noticeable errors in spelling and/or word formation that may cause some difficulty for the reader
101
 
102
 
103
+ # Score 4: • uses only basic vocabulary which may be used repetitively or which may be inappropriate for the task
104
+ # • has limited control of word formation and/or spelling; errors may cause strain for the reader
105
 
106
 
107
+ # Score 3: • uses only a very limited range of words and expressions with very limited control of word formation and/or spelling
108
+ # • errors may severely distort the message
109
 
110
 
111
+ # Score 2: • uses an extremely limited range of vocabulary; essentially no control of word formation and/or spelling
112
 
113
 
114
+ # Score 1: • can only use a few isolated words
115
 
116
 
117
 
118
 
119
 
120
+ # Grammatical Range and Accuracy: Score 9: • uses a wide range of structures with full flexibility and accuracy; rare minor errors occur only as 'slips'
121
 
122
 
123
+ # Score 8: • uses a wide range of structures
124
+ # • the majority of sentences are error-free
125
+ # • makes only very occasional errors or inappropriacies
126
 
127
 
128
+ # Score 7: • uses a variety of complex structures
129
+ # • produces frequent error-free sentences
130
+ # • has good control of grammar and punctuation but may make a few errors
131
 
132
 
133
+ # Score 6: • uses a mix of simple and complex sentence forms
134
+ # • makes some errors in grammar and punctuation but they rarely reduce communication
135
 
136
 
137
+ # Score 5: • uses only a limited range of structures
138
+ # • attempts complex sentences but these tend to be less accurate than simple sentences
139
+ # • may make frequent grammatical errors and punctuation may be faulty; errors can cause some difficulty for the reader
140
 
141
 
142
+ # Score 4: • uses only a very limited range of structures with only rare use of subordinate clauses
143
+ # • some structures are accurate but errors predominate, and punctuation is often faulty
144
 
145
 
146
+ # Score 3: • attempts sentence forms but errors in grammar and punctuation predominate and distort the meaning
147
 
148
 
149
+ # Score 2: • cannot use sentence forms except in memorised phrases
150
 
151
 
152
+ # Score 1: • cannot use sentence forms at all
153
 
154
+ # Essay Question: {}
155
+ # Essay: {}
156
+ # """
157
+ # return self._get_completion(score_prompt.format(essay_question, essay_content))
158
 
159
+ # def analyze_grammar(self, essay_content):
160
+ # gram_prompt = """ Provide a grammatical analysis of the following essay with respect to the content a B1 leanrner must know. Provide all the grammars used and their accuracy pecentage. E.g. "Simple Present: 70%". Also provide comments for inaccurate structures. Don't give me any exercises.
161
 
162
+ # Essay: {}
163
+ # """
164
+ # return self._get_completion(gram_prompt.format(essay_content))
165
 
166
+ # def analyze_vocabulary(self, essay_content):
167
+ # vocab_prompt = """ Provide the lexical errors and provide their correct forms. For each of the vocabulary related to the context of the essay that you think could be improved, give suggesions. Provide full sentence examples for these suggestions.
168
 
169
+ # Essay: {}
170
+ # """
171
+ # return self._get_completion(vocab_prompt.format(essay_content))
172
 
173
+ # def analyze_cohesion_coherence(self, essay_content):
174
+ # coh_coh_prompt = """ Evaluate the cohesion and coherence of essay based on the following aspects. DO NOT SCORE THEM:
175
 
176
+ # - paraphrasing ability
177
+ # - logical sequencing of ideas
178
+ # - use and accuracy of cohesive devices
179
+ # - existence of a clear central topic in each paragrpah
180
+ # - correct paragraphing
181
 
182
 
183
+ # Essay: {}
184
+ # """
185
+ # return self._get_completion(coh_coh_prompt.format(essay_content))
186
 
187
+ # def analyze_task_achievement(self, essay_question, essay_content):
188
+ # task_ach_prompt = """ Evaluate the Task Achievement factor of this IELTS essay based on the following question and essay. Do NOT provide scores. Just mention analysis and how it could be imporved.
189
 
190
+ # Question: {}
191
 
192
 
193
+ # Essay: {}
194
+ # """
195
+ # return self._get_completion(task_ach_prompt.format(essay_question, essay_content))
196
+
197
+ # def _get_completion(self, prompt):
198
+ # response = self.client.chat.completions.create(
199
+ # messages=[{"role": "user", "content": prompt}],
200
+ # model=self.model
201
+ # )
202
+ # return response.choices[0].message.content
203
+
204
+
205
+ # class IELTSTask2ExerciseGenerator:
206
+ # def __init__(self):
207
+ # self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
208
+ # self.model = "llama3-8b-8192"
209
+
210
+ # def generate_grammar_exercises(self, grammar_analysis, essay_content):
211
+ # gram_exer_prompt = """ Based on the content of the grammar analysis provided in JSON format, make 4 different exercise types and provide 10 exercises for each. All the items should be in the context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
212
+
213
+ # Grammar Analysis: {}
214
+
215
+ # Essay: {}
216
+ # """
217
+ # return self._get_completion(gram_exer_prompt.format(grammar_analysis, essay_content))
218
+
219
+ # def generate_vocabulary_exercises(self, vocab_analysis):
220
+ # vocab_exer_prompt = """ Based on the lexical errors, provide exercises in the same context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
221
+
222
+ # Lexical Errors: {}
223
+ # """
224
+ # return self._get_completion(vocab_exer_prompt.format(vocab_analysis))
225
+
226
+ # def _get_completion(self, prompt):
227
+ # response = self.client.chat.completions.create(
228
+ # messages=[{"role": "user", "content": prompt}],
229
+ # model=self.model
230
+ # )
231
+ # return response.choices[0].message.content
232
+
233
+
234
+ # class IELTSTask1Evaluator:
235
+ # def __init__(self):
236
+ # self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
237
+ # self.text_model = "llama3-8b-8192"
238
+ # self.vision_model = "llama-3.2-11b-vision-preview"
239
+
240
+ # def encode_image(self, image_path):
241
+ # with open(image_path, "rb") as image_file:
242
+ # return base64.b64encode(image_file.read()).decode('utf-8')
243
+
244
+ # def analyze_graph(self, image_path, question):
245
+ # """Analyzes the graph/chart provided for Task 1 considering the question"""
246
+ # base64_image = self.encode_image(image_path)
247
+ # response = self.client.chat.completions.create(
248
+ # messages=[
249
+ # {
250
+ # "role": "user",
251
+ # "content": [
252
+ # {
253
+ # "type": "text",
254
+ # "text": f"""Given this IELTS Task 1 question: {question}
255
+
256
+ # Please analyze the graph/chart and:
257
+ # 1. List the key features that need to be described based on the question requirements
258
+ # 2. Identify the main trends, patterns, or comparisons required by the question
259
+ # 3. Note any significant data points that should be highlighted
260
+ # 4. Specify what should be included in the overview
261
+ # 5. Indicate what details should be included in the body paragraphs
262
+
263
+ # Provide your analysis in a structured format that can be used to evaluate student responses."""
264
+ # },
265
+ # {
266
+ # "type": "image_url",
267
+ # "image_url": {
268
+ # "url": f"data:image/jpeg;base64,{base64_image}",
269
+ # },
270
+ # },
271
+ # ],
272
+ # }
273
+ # ],
274
+ # model=self.vision_model,
275
+ # )
276
+ # return response.choices[0].message.content
277
+
278
+ # def evaluate_task1_response(self, question, graph_analysis, written_response):
279
+ # score_prompt = """Provide the scores for the following IELTS Task 1 essay based on the graph analysis provided and the written response. Score in terms of Task Achievement, Coherence and Cohesion, Lexical Resource, and Grammatical Range & Accuracy criteria. The scores should be in IELTS framework. Each criterion should be scored between 1 to 9 and should be integer. E.g. 5.5 is not acceptable! For each criteria that you score, provide 3 different comments using the band descriptors and by referencing both the key features of the graph and how well they were described in the response.
280
+ # IELTS Task 1 Band Descriptors:
281
+
282
+ # Band 9:
283
+ # • Fully satisfies all requirements of the task
284
+ # • Clearly presents a fully developed response
285
+ # • Uses cohesion in such a way that it attracts no attention
286
+ # • Skilfully manages paragraphing
287
+ # • Uses a wide range of vocabulary with very natural and sophisticated control of lexical features; rare minor errors occur only as 'slips'
288
+ # • Uses a wide range of structures with full flexibility and accuracy; rare minor errors occur only as 'slips'
289
+
290
+ # Band 8:
291
+ # • Covers all requirements of the task sufficiently
292
+ # • Presents, highlights and illustrates key features/bullet points clearly and appropriately
293
+ # • Sequences information and ideas logically
294
+ # • Manages all aspects of cohesion well
295
+ # • Uses paragraphing sufficiently and appropriately
296
+ # • Uses a wide range of vocabulary fluently and flexibly to convey precise meanings
297
+ # • Skilfully uses uncommon lexical items but there may be occasional inaccuracies in word choice and collocation
298
+ # • Produces rare errors in spelling and/or word formation
299
+ # • Uses a wide range of structures
300
+ # • The majority of sentences are error-free
301
+ # • Makes only very occasional errors or inappropriacies
302
+
303
+ # Band 7:
304
+ # • Covers the requirements of the task
305
+ # • Presents a clear overview of main trends, differences or stages
306
+ # • Clearly presents and highlights key features/bullet points but could be more fully extended
307
+ # • Logically organises information and ideas; there is clear progression throughout
308
+ # • Uses a range of cohesive devices appropriately although there may be some under-/over-use
309
+ # • Uses a sufficient range of vocabulary to allow some flexibility and precision
310
+ # • Uses less common lexical items with some awareness of style and collocation
311
+ # • May produce occasional errors in word choice, spelling and/or word formation
312
+ # • Uses a variety of complex structures
313
+ # • Produces frequent error-free sentences
314
+ # • Has good control of grammar and punctuation but may make a few errors
315
+
316
+ # Band 6:
317
+ # • Addresses the requirements of the task
318
+ # • Presents an overview with information appropriately selected
319
+ # • Presents and adequately highlights key features/bullet points but details may be irrelevant, inappropriate or inaccurate
320
+ # • Arranges information and ideas coherently and there is a clear overall progression
321
+ # • Uses cohesive devices effectively, but cohesion within and/or between sentences may be faulty or mechanical
322
+ # • May not always use referencing clearly or appropriately
323
+ # • Uses an adequate range of vocabulary for the task
324
+ # • Attempts to use less common vocabulary but with some inaccuracy
325
+ # • Makes some errors in spelling and/or word formation, but they do not impede communication
326
+ # • Uses a mix of simple and complex sentence forms
327
+ # • Makes some errors in grammar and punctuation but they rarely reduce communication
328
+
329
+ # Band 5:
330
+ # • Generally addresses the task; the format may be inappropriate in places
331
+ # • Recounts detail mechanically with no clear overview; there may be no data to support the description
332
+ # • Presents, but inadequately covers, key features/bullet points; there may be a tendency to focus on details
333
+ # • Presents information with some organisation but there may be a lack of overall progression
334
+ # • Makes inadequate, inaccurate or over-use of cohesive devices
335
+ # • May be repetitive because of lack of referencing and substitution
336
+ # • Uses a limited range of vocabulary, but this is minimally adequate for the task
337
+ # • May make noticeable errors in spelling and/or word formation that may cause some difficulty for the reader
338
+ # • Uses only a limited range of structures
339
+ # • Attempts complex sentences but these tend to be less accurate than simple sentences
340
+ # • May make frequent grammatical errors and punctuation may be faulty; errors can cause some difficulty for the reader
341
+
342
+ # Band 4:
343
+ # • Attempts to address the task but does not cover all key features/bullet points; the format may be inappropriate
344
+ # • May confuse key features/bullet points with detail; parts may be unclear, irrelevant, repetitive or inaccurate
345
+ # • Presents information and ideas but these are not arranged coherently and there is no clear progression
346
+ # • Uses some basic cohesive devices but these may be inaccurate or repetitive
347
+ # • Uses only basic vocabulary which may be used repetitively or which may be inappropriate for the task
348
+ # • Has limited control of word formation and/or spelling; errors may cause strain for the reader
349
+ # • Uses only a very limited range of structures with only rare use of subordinate clauses
350
+ # • Some structures are accurate but errors predominate, and punctuation is often faulty
351
+
352
+ # Band 3:
353
+ # • Fails to address the task, which may have been completely misunderstood
354
+ # • Presents limited ideas which may be largely irrelevant/repetitive
355
+ # • Does not organise ideas logically
356
+ # • May use a very limited range of cohesive devices, and those used may not indicate a logical relationship between ideas
357
+ # • Uses only a very limited range of words and expressions with very limited control of word formation and/or spelling
358
+ # • Errors may severely distort the message
359
+ # • Attempts sentence forms but errors in grammar and punctuation predominate and distort the meaning
360
+
361
+ # Band 2:
362
+ # • Answer is barely related to the task
363
+ # • Has very little control of organisational features
364
+ # • Uses an extremely limited range of vocabulary; essentially no control of word formation and/or spelling
365
+ # • Cannot use sentence forms except in memorised phrases
366
+
367
+ # Band 1:
368
+ # • Answer is completely unrelated to the task
369
+ # • Fails to communicate any message
370
+ # • Can only use a few isolated words
371
+ # • Cannot use sentence forms at all
372
+
373
+ # Band 0:
374
+ # • Does not attend
375
+ # • Does not attempt the task in any way
376
+ # • Writes a totally memorised response
377
+ # Question: {}
378
+ # Graph Analysis: {}
379
+ # Written Response: {}
380
+ # """
381
+ # return self._get_completion(score_prompt.format(question, graph_analysis, written_response))
382
+
383
+ # def analyze_grammar(self, written_response):
384
+ # gram_prompt = """Provide a grammatical analysis of the following Task 1 response with respect to describing trends, comparisons, and data. Provide all the grammars used and their accuracy percentage. E.g. "Past tense: 70%". Also provide comments for inaccurate structures. Focus especially on:
385
+ # - Tense usage for trends
386
+ # - Comparative structures
387
+ # - Passive voice
388
+ # - Articles with data
389
+ # - Prepositions with trends
390
+
391
+ # Response: {}
392
+ # """
393
+ # return self._get_completion(gram_prompt.format(written_response))
394
+
395
+ # def analyze_vocabulary(self, written_response):
396
+ # vocab_prompt = """Analyze the vocabulary used in this Task 1 response, focusing on:
397
+ # - Language for describing trends
398
+ # - Comparison vocabulary
399
+ # - Data reporting vocabulary
400
+ # - Graph/chart-specific terminology
401
+ # Provide suggestions for improvement with examples in similar contexts.
402
+
403
+ # Response: {}
404
+ # """
405
+ # return self._get_completion(vocab_prompt.format(written_response))
406
+
407
+ # def analyze_task_achievement(self, question, graph_analysis, written_response):
408
+ # task_ach_prompt = """Compare the key features identified in the graph analysis with how they were covered in the written response. Evaluate:
409
+ # - Overview presence and quality
410
+ # - Key feature selection
411
+ # - Data accuracy
412
+ # - Trend description completeness
413
+ # Do NOT provide scores, only analysis and suggestions for improvement.
414
+
415
+ # Question: {}
416
+ # Graph Analysis: {}
417
+ # Written Response: {}
418
+ # """
419
+ # return self._get_completion(task_ach_prompt.format(question, graph_analysis, written_response))
420
+
421
+ # def analyze_cohesion_coherence(self, written_response, graph_analysis):
422
+ # coh_coh_prompt = """Evaluate the cohesion and coherence of essay based on the following aspects. DO NOT SCORE THEM:
423
+ # - paraphrasing ability
424
+ # - logical sequencing of ideas
425
+ # - use and accuracy of cohesive devices
426
+ # - existence of a clear central topic in each paragraph
427
+ # - correct paragraphing
428
+
429
+ # Graph Analysis: {}
430
+ # Response: {}
431
+ # """
432
+ # return self._get_completion(coh_coh_prompt.format(graph_analysis, written_response))
433
+
434
+ # def _get_completion(self, prompt):
435
+ # response = self.client.chat.completions.create(
436
+ # messages=[{"role": "user", "content": prompt}],
437
+ # model=self.text_model
438
+ # )
439
+ # return response.choices[0].message.content
440
+
441
+
442
+
443
+ # class IELTSTask1ExerciseGenerator:
444
+ # def __init__(self):
445
+ # self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
446
+ # self.model = "llama3-8b-8192"
447
+
448
+ # def generate_grammar_exercises(self, grammar_analysis, essay_content):
449
+ # gram_exer_prompt = """ Based on the content of the grammar analysis provided in JSON format, make 4 different exercise types and provide 10 exercises for each. All the items should be in the context of the essay. Avoid providing exercises that are exact replica of the candidates' mistakes.
450
+
451
+ # Grammar Analysis: {}
452
+ # Essay: {}
453
+ # """
454
+ # return self._get_completion(gram_exer_prompt.format(grammar_analysis, essay_content))
455
+
456
+ # def generate_graph_description_exercises(self, graph_analysis):
457
+ # exercise_prompt = """Based on the graph analysis provided, create exercises to practice:
458
+ # 1. Writing overviews (1 exercise)
459
+ # 2. Describing trends (1 exercise)
460
+ # 3. Comparing data points (1 exercises)
461
+ # 4. Selecting key features (1 exercises)
462
+
463
+ # Keep all exercises relevant to the graph context.
464
+
465
+ # Graph Analysis: {}
466
+ # """
467
+ # return self._get_completion(exercise_prompt.format(graph_analysis))
468
+
469
+ # def generate_vocabulary_exercises(self, vocab_analysis):
470
+ # vocab_prompt = """Create targeted vocabulary exercises for Task 1 writing based on the vocabulary analysis. Include:
471
+ # 1. Trend description vocabulary
472
+ # 2. Comparison language
473
+ # 3. Data reporting phrases
474
+ # 4. Graph-specific terminology
475
+
476
+ # Analysis: {}
477
+ # """
478
+ # return self._get_completion(vocab_prompt.format(vocab_analysis))
479
+
480
+ # def _get_completion(self, prompt):
481
+ # response = self.client.chat.completions.create(
482
+ # messages=[{"role": "user", "content": prompt}],
483
+ # model=self.model
484
+ # )
485
+ # return response.choices[0].message.content
486
+
487
+
488
+ # def llm_responder_t1(image_path, question, written_response):
489
+ # task1_evaluator = IELTSTask1Evaluator()
490
+ # task1_generator = IELTSTask1ExerciseGenerator()
491
+ # graph_analysis = task1_evaluator.analyze_graph(image_path)
492
+ # general_analysis = task1_evaluator.evaluate_essay(question, graph_analysis, written_response)
493
+ # ga = task1_evaluator.analyze_grammar(written_response)
494
+ # lr = task1_evaluator.analyze_vocabulary(written_response)
495
+ # ta = task1_evaluator.analyze_task_achievement(question, graph_analysis, written_response)
496
+ # ga_exercise = task1_generator.generate_grammar_exercises(ga, written_response)
497
+ # lr_exercise = task1_generator.generate_vocabulary_exercises(lr)
498
+ # return ga, lr, cc, ta, ga_exercise, lr_exercise
499
+
500
+
501
+ # def llm_responder_t2(question, written_response):
502
+ # task2_evaluator = IELTSTask2Evaluator()
503
+ # task2_generator = IELTSTask2ExerciseGenerator()
504
+ # general_analysis = task2_evaluator.evaluate_essay(essay_question, essay_content)
505
+ # ga = task2_evaluator.analyze_grammar(written_response)
506
+ # lr = task2_evaluator.analyze_vocabulary(written_response)
507
+ # ta = task2_evaluator.analyze_task_achievement(essay_question, essay_content)
508
+ # ga_exercise = task2_generator.generate_grammar_exercises(ga, essay_content)
509
+ # lr_exercise = task2_generator.generate_vocabulary_exercises(lr)
510
+
511
+ # return ga, lr, cc, ta, ga_exercise, lr_exercise
512
+
513
+ import os
514
+ import base64
515
+ from groq import Groq
516
+
517
+ # -------------------------------
518
+ # GLOBAL MODEL SETTINGS
519
+ # -------------------------------
520
+ TEXT_MODEL = "llama-3.1-70b-versatile"
521
+ FAST_MODEL = "llama3-8b-8192"
522
+ VISION_MODEL = "llama-3.2-11b-vision-preview"
523
+
524
+
525
+ # ================================================================
526
+ # =============== TASK 2 EVALUATOR ===========================
527
+ # ================================================================
528
+ class IELTSTask2Evaluator:
529
+ def __init__(self):
530
+ self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
531
+ self.model = TEXT_MODEL
532
+
533
+ # -------------------------------
534
+ # SCORE ESSAY
535
+ # -------------------------------
536
+ def evaluate_essay(self, essay_question, essay_content):
537
+ prompt = f"""
538
+ You are an IELTS examiner. Evaluate this Task 2 essay.
539
+
540
+ Strict rules:
541
+ - Score each criterion (TA, CC, LR, GRA) from **1 to 9**, only integers.
542
+ - Provide **3 examiner-style comments per criterion**.
543
+ - Base the evaluation strictly on IELTS Writing Task 2 band descriptors.
544
+
545
+ Essay Question: {essay_question}
546
+
547
+ Essay:
548
+ {essay_content}
549
+ """
550
+ return self._get_completion(prompt)
551
+
552
+ # -------------------------------
553
+ # GRAMMAR ANALYSIS
554
+ # -------------------------------
555
+ def analyze_grammar(self, essay_content):
556
+ prompt = f"""
557
+ Provide a full grammar analysis for a B1–C1 learner.
558
+
559
+ Include:
560
+ - Grammar structures used
561
+ - Accuracy % (e.g., “Present Simple: 70%”)
562
+ - Explanations for inaccurate structures
563
+
564
+ Essay:
565
+ {essay_content}
566
+ """
567
+ return self._get_completion(prompt)
568
+
569
+ # -------------------------------
570
+ # VOCABULARY ANALYSIS
571
+ # -------------------------------
572
+ def analyze_vocabulary(self, essay_content):
573
+ prompt = f"""
574
+ Provide a vocabulary analysis of the essay:
575
+
576
+ Include:
577
+ - Lexical errors + corrections
578
+ - Stronger alternatives for weak vocabulary
579
+ - Full sentence examples
580
+
581
+ Essay:
582
+ {essay_content}
583
+ """
584
+ return self._get_completion(prompt)
585
+
586
+ # -------------------------------
587
+ # COHESION & COHERENCE
588
+ # -------------------------------
589
+ def analyze_cohesion_coherence(self, essay_content):
590
+ prompt = f"""
591
+ Evaluate cohesion & coherence (NO SCORES):
592
+
593
+ Criteria:
594
+ - Paraphrasing quality
595
+ - Logical sequencing
596
+ - Cohesive devices use
597
+ - Topic clarity per paragraph
598
+ - Paragraph structure quality
599
+
600
+ Essay:
601
+ {essay_content}
602
+ """
603
+ return self._get_completion(prompt)
604
+
605
+ # -------------------------------
606
+ # TASK ACHIEVEMENT
607
+ # -------------------------------
608
+ def analyze_task_achievement(self, essay_question, essay_content):
609
+ prompt = f"""
610
+ Analyze Task Achievement for this Task 2 essay (NO SCORES):
611
+
612
+ Explain:
613
+ - Whether all parts of the question are addressed
614
+ - Idea development
615
+ - Position clarity
616
+ - Suggestions for improvement
617
+
618
+ Question: {essay_question}
619
 
620
+ Essay: {essay_content}
621
+ """
622
+ return self._get_completion(prompt)
623
+
624
+ # -------------------------------
625
+ # INTERNAL CALL
626
+ # -------------------------------
627
  def _get_completion(self, prompt):
628
  response = self.client.chat.completions.create(
629
+ model=self.model,
630
+ messages=[{"role": "user", "content": prompt}]
631
  )
632
+ return response.choices[0].message["content"]
633
+
634
 
635
 
636
+ # ================================================================
637
+ # =============== TASK 2 EXERCISE GENERATOR ==================
638
+ # ================================================================
639
  class IELTSTask2ExerciseGenerator:
640
  def __init__(self):
641
+ self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
642
+ self.model = FAST_MODEL
643
 
644
  def generate_grammar_exercises(self, grammar_analysis, essay_content):
645
+ prompt = f"""
646
+ Create grammar exercises based on this grammar analysis.
647
 
648
+ Rules:
649
+ - 4 exercise types
650
+ - 10 items each
651
+ - All exercises must use the essay’s topic
652
+ - Do NOT copy student mistakes
653
+
654
+ Grammar Analysis:
655
+ {grammar_analysis}
656
+
657
+ Essay:
658
+ {essay_content}
659
+ """
660
+ return self._get_completion(prompt)
661
 
662
  def generate_vocabulary_exercises(self, vocab_analysis):
663
+ prompt = f"""
664
+ Create vocabulary practice exercises based on these lexical issues.
665
 
666
+ Include:
667
+ - Reformulation
668
+ - Synonyms
669
+ - Contextual rewriting
670
+
671
+ Vocabulary Analysis:
672
+ {vocab_analysis}
673
+ """
674
+ return self._get_completion(prompt)
675
 
676
  def _get_completion(self, prompt):
677
  response = self.client.chat.completions.create(
678
+ model=self.model,
679
  messages=[{"role": "user", "content": prompt}],
 
680
  )
681
+ return response.choices[0].message["content"]
682
 
683
 
684
+
685
+ # ================================================================
686
+ # =============== TASK 1 EVALUATOR ===========================
687
+ # ================================================================
688
  class IELTSTask1Evaluator:
689
  def __init__(self):
690
+ self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
691
+ self.text_model = TEXT_MODEL
692
+ self.vision_model = VISION_MODEL
693
 
694
+ def encode_image(self, path):
695
+ with open(path, "rb") as f:
696
+ return base64.b64encode(f.read()).decode()
697
 
698
+ # -------------------------------
699
+ # GRAPH ANALYSIS (VISION)
700
+ # -------------------------------
701
  def analyze_graph(self, image_path, question):
702
+ base64_img = self.encode_image(image_path)
703
+
704
  response = self.client.chat.completions.create(
705
+ model=self.vision_model,
706
  messages=[
707
  {
708
  "role": "user",
709
  "content": [
710
+ {"type": "text",
711
+ "text": f"Analyze this IELTS Task 1 graph. Question: {question}"},
712
+ {"type": "image_url",
713
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_img}"}}
714
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  }
716
+ ]
 
717
  )
718
+ return response.choices[0].message["content"]
719
+
720
+ # -------------------------------
721
+ # TASK 1 FULL EVALUATION
722
+ # -------------------------------
723
  def evaluate_task1_response(self, question, graph_analysis, written_response):
724
+ prompt = f"""
725
+ Evaluate this IELTS Task 1 response.
726
+
727
+ Provide:
728
+ - Integer band scores (1–9) for TA, CC, LR, GRA
729
+ - 3 comments per criterion
730
+ - Compare student writing to the graph analysis
731
+
732
+ Question: {question}
733
+
734
+ Graph Analysis:
735
+ {graph_analysis}
736
+
737
+ Response:
738
+ {written_response}
739
+ """
740
+ return self._get_completion(prompt)
741
+
742
+ # -------------------------------
743
+ # OTHER ANALYSES (Grammar, Vocabulary, TA, CC)
744
+ # -------------------------------
745
+ def analyze_grammar(self, text):
746
+ prompt = f"""
747
+ Analyze grammar for Task 1 response:
748
+
749
+ Focus:
750
+ - Tenses for trends
751
+ - Comparative structures
752
+ - Passive forms
753
+ - Prepositions for charts
754
+ - Numeric language
755
+
756
+ Response:
757
+ {text}
758
+ """
759
+ return self._get_completion(prompt)
760
+
761
+ def analyze_vocabulary(self, text):
762
+ prompt = f"""
763
+ Analyze vocabulary for Task 1:
764
+
765
+ Include:
766
+ - Trend vocabulary
767
+ - Comparison vocabulary
768
+ - Chart terminology
769
+ - Better synonyms
770
+
771
+ Response:
772
+ {text}
773
+ """
774
+ return self._get_completion(prompt)
775
+
776
+ def analyze_task_achievement(self, question, graph_analysis, text):
777
+ prompt = f"""
778
+ Evaluate Task Achievement (NO SCORES):
779
+
780
+ Compare:
781
+ - Key features vs graph analysis
782
+ - Overview quality
783
+ - Trend accuracy
784
+ - Detail selection
785
+
786
+ Question: {question}
787
+
788
+ Graph:
789
+ {graph_analysis}
790
+
791
+ Response:
792
+ {text}
793
+ """
794
+ return self._get_completion(prompt)
795
+
796
+ def analyze_cohesion_coherence(self, text, graph_analysis):
797
+ prompt = f"""
798
+ Evaluate Cohesion & Coherence (NO SCORES):
799
+
800
+ Graph Reference:
801
+ {graph_analysis}
802
+
803
+ Response:
804
+ {text}
805
+ """
806
+ return self._get_completion(prompt)
807
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  def _get_completion(self, prompt):
809
+ resp = self.client.chat.completions.create(
810
+ model=self.text_model,
811
+ messages=[{"role": "user", "content": prompt}]
812
  )
813
+ return resp.choices[0].message["content"]
814
+
 
815
 
816
+
817
+ # ================================================================
818
+ # =============== TASK 1 EXERCISE GENERATOR ==================
819
+ # ================================================================
820
  class IELTSTask1ExerciseGenerator:
821
  def __init__(self):
822
+ self.client = Groq(api_key=os.getenv("GROQ_API_KEY"))
823
+ self.model = FAST_MODEL
824
 
825
  def generate_grammar_exercises(self, grammar_analysis, essay_content):
826
+ prompt = f"""
827
+ Generate Task 1 grammar exercises:
828
+ - 4 exercise types
829
+ - 10 items each
830
+ - Based on grammar analysis
831
+ - Context must match essay topic
832
+
833
+ Grammar Analysis:
834
+ {grammar_analysis}
835
+
836
+ Essay:
837
+ {essay_content}
838
+ """
839
+ return self._get_completion(prompt)
840
 
 
 
 
 
 
841
  def generate_graph_description_exercises(self, graph_analysis):
842
+ prompt = f"""
843
+ Create Task 1 exercises:
 
 
 
844
 
845
+ 1. Write an overview
846
+ 2. Describe a trend
847
+ 3. Compare two data points
848
+ 4. Select key features
849
 
850
+ Graph Analysis:
851
+ {graph_analysis}
852
  """
853
+ return self._get_completion(prompt)
854
 
855
  def generate_vocabulary_exercises(self, vocab_analysis):
856
+ prompt = f"""
857
+ Create Task 1 vocabulary exercises:
858
+
859
+ Include:
860
+ - Trend verbs
861
+ - Comparison vocabulary
862
+ - Chart-specific words
863
 
864
+ Vocabulary Analysis:
865
+ {vocab_analysis}
866
  """
867
+ return self._get_completion(prompt)
868
 
869
  def _get_completion(self, prompt):
870
+ resp = self.client.chat.completions.create(
871
+ model=self.model,
872
  messages=[{"role": "user", "content": prompt}],
 
873
  )
874
+ return resp.choices[0].message["content"]
875
+
876
 
877
 
878
+ # ================================================================
879
+ # =============== T1 & T2 RESPONDER ============================
880
+ # ================================================================
881
  def llm_responder_t1(image_path, question, written_response):
882
+ ev = IELTSTask1Evaluator()
883
+ ex = IELTSTask1ExerciseGenerator()
884
+
885
+ graph = ev.analyze_graph(image_path, question)
886
+ evaluation = ev.evaluate_task1_response(question, graph, written_response)
887
+ grammar = ev.analyze_grammar(written_response)
888
+ vocab = ev.analyze_vocabulary(written_response)
889
+ cohesion = ev.analyze_cohesion_coherence(written_response, graph)
890
+ task_ach = ev.analyze_task_achievement(question, graph, written_response)
891
+
892
+ grammar_ex = ex.generate_grammar_exercises(grammar, written_response)
893
+ vocab_ex = ex.generate_vocabulary_exercises(vocab)
894
+
895
+ return evaluation, grammar, vocab, cohesion, task_ach, grammar_ex, vocab_ex
896
 
897
 
898
  def llm_responder_t2(question, written_response):
899
+ ev = IELTSTask2Evaluator()
900
+ ex = IELTSTask2ExerciseGenerator()
901
+
902
+ evaluation = ev.evaluate_essay(question, written_response)
903
+ grammar = ev.analyze_grammar(written_response)
904
+ vocab = ev.analyze_vocabulary(written_response)
905
+ cohesion = ev.analyze_cohesion_coherence(written_response)
906
+ task_ach = ev.analyze_task_achievement(question, written_response)
907
+
908
+ grammar_ex = ex.generate_grammar_exercises(grammar, written_response)
909
+ vocab_ex = ex.generate_vocabulary_exercises(vocab)
910
+
911
+ return evaluation, grammar, vocab, cohesion, task_ach, grammar_ex, vocab_ex