Hak978 commited on
Commit
66c7c07
·
verified ·
1 Parent(s): a16242f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -110
app.py CHANGED
@@ -155,14 +155,10 @@ import numpy as np
155
  import os
156
  from pathlib import Path
157
 
158
-
159
-
160
- # warnings.filterwarnings("ignore")
161
-
162
- app = Flask(__name__)
163
 
164
  # Configure cache directories
165
- cache_base = os.getenv('XDG_CACHE_HOME', '/app/cache')
166
  huggingface_cache = os.path.join(cache_base, 'huggingface')
167
  languagetool_cache = os.path.join(cache_base, 'languagetool')
168
 
@@ -170,44 +166,19 @@ languagetool_cache = os.path.join(cache_base, 'languagetool')
170
  Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
171
  Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
172
 
173
- # Initialize LanguageTool with explicit cache
174
- # try:
175
- # grammar_tool = LanguageTool(
176
- # 'en-US',
177
- # config={
178
- # 'cacheDir': os.getenv('LT_CACHE', '/tmp/languagetool')
179
- # },
180
- # remote_server='https://api.languagetool.org' # Remote server as separate parameter
181
- # )
182
- # print("LanguageTool initialized successfully")
183
- # except Exception as e:
184
- # print(f"Error initializing LanguageTool: {e}")
185
- # grammar_tool = None
186
- # Configure LanguageTool cache
187
- lt_cache = os.getenv('LT_CACHE', '/app/cache/languagetool')
188
- Path(lt_cache).mkdir(parents=True, exist_ok=True)
189
-
190
  try:
191
- # Option 1: Force remote server (recommended)
192
  grammar_tool = LanguageTool(
193
  'en-US',
194
  remote_server='https://api.languagetool.org'
195
  )
196
-
197
- # Option 2: Local server with explicit cache (if really needed)
198
- # grammar_tool = LanguageTool(
199
- # 'en-US',
200
- # config={
201
- # 'cacheDir': lt_cache,
202
- # 'server': 'https://api.languagetool.org'
203
- # }
204
- # )
205
-
206
  print("LanguageTool initialized successfully")
207
  except Exception as e:
208
  print(f"Error initializing LanguageTool: {e}")
209
  grammar_tool = None
210
 
 
 
211
 
212
  # Load Hugging Face models
213
  MODEL_NAME = "Hak978/aes-bert-models"
@@ -234,92 +205,193 @@ except Exception as e:
234
  print(f"Error loading models: {e}")
235
  model_website1 = model_website2 = tokenizer = None
236
 
237
- def check_spelling(text):
238
- words = text.split()
239
- misspelled = spell.unknown(words)
240
- return list(misspelled)
241
-
242
- def check_grammar(text):
243
- matches = grammar_tool.check(text)
244
- return [{'message': match.message, 'replacements': match.replacements} for match in matches]
245
-
246
- def count_words(text):
247
- words = text.split()
248
- return len(words)
249
-
250
- def calculate_sentence_lengths(text):
251
- sentences = text.split('.')
252
- lengths = [len(sentence.split()) for sentence in sentences if sentence.strip()]
253
- return {
254
- 'average': np.mean(lengths) if lengths else 0,
255
- 'min': min(lengths) if lengths else 0,
256
- 'max': max(lengths) if lengths else 0
 
257
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- def calculate_vocabulary_diversity(text):
260
- words = text.lower().split()
261
- unique_words = set(words)
262
- return len(unique_words) / len(words) if words else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
- def count_punctuation(text):
265
- return sum(1 for char in text if char in string.punctuation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- def predict_score(text, model, tokenizer):
268
- # Tokenize and prepare input
269
- inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
270
 
271
- # Get model prediction
272
- with torch.no_grad():
273
- outputs = model(**inputs)
274
- predictions = outputs.logits
275
 
276
- # Convert prediction to score (assuming regression model)
277
- predicted_score = predictions.item()
278
- return predicted_score
 
 
 
279
 
280
- @app.route('/')
281
- def home():
282
- return render_template('index.html')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- @app.route('/analyze', methods=['POST'])
285
- def analyze():
286
  if request.method == 'POST':
287
- essay_text = request.form['essay']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- # Basic statistics
290
- word_count = count_words(essay_text)
291
- sentence_stats = calculate_sentence_lengths(essay_text)
292
- vocabulary_diversity = calculate_vocabulary_diversity(essay_text)
293
- punctuation_count = count_punctuation(essay_text)
294
-
295
- # Spelling and grammar checks
296
- spelling_errors = check_spelling(essay_text)
297
- grammar_errors = check_grammar(essay_text)
298
-
299
- # Model predictions
300
- score1 = predict_score(essay_text, model_website1, tokenizer_website1)
301
- score2 = predict_score(essay_text, model_website2, tokenizer_website2)
302
-
303
- # Calculate average score
304
- average_score = (score1 + score2) / 2
305
-
306
- # Prepare feedback
307
- feedback = {
308
- 'word_count': word_count,
309
- 'avg_sentence_length': round(sentence_stats['average'], 2),
310
- 'min_sentence_length': int(sentence_stats['min']),
311
- 'max_sentence_length': int(sentence_stats['max']),
312
- 'vocabulary_diversity': round(vocabulary_diversity * 100, 2),
313
- 'punctuation_count': punctuation_count,
314
- 'spelling_errors': spelling_errors,
315
- 'grammar_errors': grammar_errors,
316
- 'score1': round(score1, 2),
317
- 'score2': round(score2, 2),
318
- 'average_score': round(average_score, 2)
319
- }
320
-
321
- return render_template('result.html', feedback=feedback)
322
 
323
  if __name__ == '__main__':
324
- port = int(os.environ.get('PORT', 7860)) # Changed from 5000 to 7860 for Spaces
325
  app.run(host='0.0.0.0', port=port)
 
155
  import os
156
  from pathlib import Path
157
 
158
+ app = Flask(__name__, template_folder='.')
 
 
 
 
159
 
160
  # Configure cache directories
161
+ cache_base = os.getenv('XDG_CACHE_HOME', '/tmp/cache')
162
  huggingface_cache = os.path.join(cache_base, 'huggingface')
163
  languagetool_cache = os.path.join(cache_base, 'languagetool')
164
 
 
166
  Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
167
  Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
168
 
169
+ # Initialize LanguageTool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  try:
 
171
  grammar_tool = LanguageTool(
172
  'en-US',
173
  remote_server='https://api.languagetool.org'
174
  )
 
 
 
 
 
 
 
 
 
 
175
  print("LanguageTool initialized successfully")
176
  except Exception as e:
177
  print(f"Error initializing LanguageTool: {e}")
178
  grammar_tool = None
179
 
180
+ # Initialize SpellChecker
181
+ spell = SpellChecker()
182
 
183
  # Load Hugging Face models
184
  MODEL_NAME = "Hak978/aes-bert-models"
 
205
  print(f"Error loading models: {e}")
206
  model_website1 = model_website2 = tokenizer = None
207
 
208
+ def tokenize_text(text, tokenizer):
209
+ tokens = tokenizer.encode_plus(
210
+ text,
211
+ add_special_tokens=True,
212
+ max_length=512,
213
+ truncation=True,
214
+ return_token_type_ids=False,
215
+ padding='max_length',
216
+ return_attention_mask=True,
217
+ return_tensors='pt'
218
+ )
219
+ return tokens['input_ids'], tokens['attention_mask']
220
+
221
+ def normalize_bert_score(raw_score, category, essay):
222
+ params = {
223
+ 'grammar': {'min': 1, 'max': 8, 'threshold': 0.8},
224
+ 'lexical': {'min': 1, 'max': 8, 'threshold': 0.8},
225
+ 'global_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
226
+ 'local_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
227
+ 'supporting_ideas': {'min': 3, 'max': 8, 'threshold': 0.6},
228
+ 'holistic': {'min': 1, 'max': 5, 'threshold': 0.9}
229
  }
230
+
231
+ category_params = params[category]
232
+ error_count = len(grammar_tool.check(essay)) if grammar_tool else 0
233
+ words = essay.split()
234
+ spelling_errors = len(spell.unknown(words)) if spell else 0
235
+
236
+ error_density = (error_count + spelling_errors) / len(words) if words else 1
237
+ penalty = error_density * 7
238
+
239
+ base_score = category_params['min'] + (raw_score * (category_params['max'] - category_params['min']))
240
+
241
+ if category in ['grammar', 'lexical', 'holistic']:
242
+ base_score = max(category_params['min'], base_score - penalty)
243
+
244
+ return round(max(category_params['min'], min(category_params['max'], base_score)), 1)
245
 
246
+ def get_predictions_website1(essays):
247
+ if not model_website1 or not tokenizer:
248
+ return []
249
+
250
+ input_ids = []
251
+ attention_masks = []
252
+
253
+ for essay in essays:
254
+ tokens = tokenize_text(essay, tokenizer)
255
+ input_ids.append(tokens[0])
256
+ attention_masks.append(tokens[1])
257
+
258
+ input_ids = torch.cat(input_ids, dim=0)
259
+ attention_masks = torch.cat(attention_masks, dim=0)
260
+
261
+ model_website1.eval()
262
+ with torch.no_grad():
263
+ outputs = model_website1(input_ids, attention_mask=attention_masks)
264
+ raw_predictions = outputs.logits.cpu().numpy()
265
+
266
+ normalized_predictions = []
267
+ categories = ['grammar', 'lexical', 'global_organization',
268
+ 'local_organization', 'supporting_ideas', 'holistic']
269
+
270
+ for raw_pred in raw_predictions:
271
+ raw_scores = 1 / (1 + np.exp(-raw_pred))
272
+ norm_pred = [
273
+ normalize_bert_score(score, category, essays[0])
274
+ for score, category in zip(raw_scores, categories)
275
+ ]
276
+ normalized_predictions.append(norm_pred)
277
+
278
+ return normalized_predictions
279
 
280
+ def calculate_grammar_score(essay):
281
+ if not grammar_tool:
282
+ return None
283
+
284
+ matches = grammar_tool.check(essay)
285
+ error_weights = {
286
+ 'SPELLING': 2.0,
287
+ 'GRAMMAR': 2.5,
288
+ 'PUNCTUATION': 1.5,
289
+ 'TYPOGRAPHY': 1.0
290
+ }
291
+
292
+ weighted_errors = 0
293
+ for match in matches:
294
+ weight = error_weights.get(match.category, 1.5)
295
+ weighted_errors += weight
296
+
297
+ words = len(essay.split())
298
+ error_density = (weighted_errors / words) * 100 if words > 0 else 100
299
+
300
+ base_score = 10 - (error_density * 0.7)
301
+ error_types = Counter(match.category for match in matches)
302
+ repeated_error_penalty = sum(count * 0.3 for count in error_types.values() if count > 2)
303
+
304
+ final_score = base_score - repeated_error_penalty
305
+ return round(max(2, min(10, final_score)), 1)
306
 
307
+ def calculate_spelling_score(essay):
308
+ words = [word.strip('.,!?()[]{}":;') for word in essay.split()]
309
+ misspelled = spell.unknown(words) if spell else []
310
 
311
+ total_words = len(words)
312
+ error_count = len(misspelled)
313
+ error_rate = error_count / total_words if total_words > 0 else 1
 
314
 
315
+ error_penalty = error_rate * 20
316
+ if error_count > 5:
317
+ error_penalty += (error_count - 5) * 0.5
318
+
319
+ spelling_score = 10 - error_penalty
320
+ return round(max(2, min(10, spelling_score)), 1)
321
 
322
+ def calculate_word_diversity(essay):
323
+ words = essay.lower().translate(str.maketrans('', '', string.punctuation)).split()
324
+ if not words:
325
+ return 7.0
326
+
327
+ misspelled = spell.unknown(words) if spell else []
328
+ spelling_penalty = len(misspelled) / len(words) * 5
329
+
330
+ stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
331
+ content_words = [word for word in words if word not in stop_words]
332
+
333
+ if not content_words:
334
+ return 7.0
335
+
336
+ total_words = len(content_words)
337
+ unique_words = len(set(content_words))
338
+ word_freq = Counter(content_words)
339
+ repeated_words = sum(1 for count in word_freq.values() if count > 2)
340
+
341
+ diversity_ratio = unique_words / total_words
342
+ repetition_penalty = min(1.5, repeated_words / unique_words)
343
+
344
+ base_score = 8 + (2 * diversity_ratio)
345
+ final_score = base_score - repetition_penalty - spelling_penalty
346
+
347
+ return round(max(5, min(10, final_score)), 1)
348
+
349
+ @app.route('/', methods=['GET', 'POST'])
350
+ def index():
351
+ context = {
352
+ 'essay': '',
353
+ 'grammar_score': None,
354
+ 'lexical_score': None,
355
+ 'global_organization_score': None,
356
+ 'local_organization_score': None,
357
+ 'supporting_ideas_score': None,
358
+ 'holistic_score': None,
359
+ 'grammar_score2': None,
360
+ 'spelling_score': None,
361
+ 'word_diversity_score': None,
362
+ 'essay_quality_score': None
363
+ }
364
 
 
 
365
  if request.method == 'POST':
366
+ essay = request.form['essay']
367
+ context['essay'] = essay
368
+
369
+ # Website 1 predictions
370
+ predictions_website1 = get_predictions_website1([essay])
371
+ if predictions_website1 and len(predictions_website1[0]) >= 6:
372
+ context.update({
373
+ 'grammar_score': predictions_website1[0][0],
374
+ 'lexical_score': predictions_website1[0][1],
375
+ 'global_organization_score': predictions_website1[0][2],
376
+ 'local_organization_score': predictions_website1[0][3],
377
+ 'supporting_ideas_score': predictions_website1[0][4],
378
+ 'holistic_score': min(5.0, predictions_website1[0][5])
379
+ })
380
+
381
+ # Website 2 predictions
382
+ context['grammar_score2'] = calculate_grammar_score(essay)
383
+ context['spelling_score'] = calculate_spelling_score(essay)
384
+ context['word_diversity_score'] = calculate_word_diversity(essay)
385
 
386
+ # Calculate overall quality score
387
+ if context['holistic_score'] and context['grammar_score2']:
388
+ context['essay_quality_score'] = round(
389
+ (context['holistic_score'] * 2 + context['grammar_score2']) / 3,
390
+ 1
391
+ )
392
+
393
+ return render_template('index.html', **context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
  if __name__ == '__main__':
396
+ port = int(os.environ.get('PORT', 7860))
397
  app.run(host='0.0.0.0', port=port)