| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
|
|
| from flask import Flask, render_template, request |
| from transformers import BertForSequenceClassification, BertTokenizer |
| from language_tool_python import LanguageTool |
| from spellchecker import SpellChecker |
| from collections import Counter |
| import string |
| import torch |
| import numpy as np |
| import os |
| from pathlib import Path |
|
|
| app = Flask(__name__, template_folder='.') |
|
|
| |
| cache_base = os.getenv('XDG_CACHE_HOME', '/tmp/cache') |
| huggingface_cache = os.path.join(cache_base, 'huggingface') |
| languagetool_cache = os.path.join(cache_base, 'languagetool') |
|
|
| |
| Path(huggingface_cache).mkdir(parents=True, exist_ok=True) |
| Path(languagetool_cache).mkdir(parents=True, exist_ok=True) |
|
|
| |
| try: |
| grammar_tool = LanguageTool( |
| 'en-US', |
| remote_server='https://api.languagetool.org' |
| ) |
| print("LanguageTool initialized successfully") |
| except Exception as e: |
| print(f"Error initializing LanguageTool: {e}") |
| grammar_tool = None |
|
|
| |
| spell = SpellChecker() |
|
|
| |
| MODEL_NAME = "Hak978/aes-bert-models" |
| try: |
| |
| model_website1 = BertForSequenceClassification.from_pretrained( |
| MODEL_NAME, |
| subfolder="essay_scoring_model_regression_20240228_123826", |
| cache_dir=huggingface_cache |
| ) |
| model_website2 = BertForSequenceClassification.from_pretrained( |
| MODEL_NAME, |
| subfolder="essay_scoring_model_regression_20240229_133324", |
| cache_dir=huggingface_cache |
| ) |
| |
| |
| tokenizer = BertTokenizer.from_pretrained( |
| 'bert-base-uncased', |
| cache_dir=huggingface_cache |
| ) |
| print("Models loaded successfully") |
| except Exception as e: |
| print(f"Error loading models: {e}") |
| model_website1 = model_website2 = tokenizer = None |
|
|
| def tokenize_text(text, tokenizer): |
| tokens = tokenizer.encode_plus( |
| text, |
| add_special_tokens=True, |
| max_length=512, |
| truncation=True, |
| return_token_type_ids=False, |
| padding='max_length', |
| return_attention_mask=True, |
| return_tensors='pt' |
| ) |
| return tokens['input_ids'], tokens['attention_mask'] |
|
|
| def normalize_bert_score(raw_score, category, essay): |
| params = { |
| 'grammar': {'min': 1, 'max': 8, 'threshold': 0.8}, |
| 'lexical': {'min': 1, 'max': 8, 'threshold': 0.8}, |
| 'global_organization': {'min': 3, 'max': 8, 'threshold': 0.6}, |
| 'local_organization': {'min': 3, 'max': 8, 'threshold': 0.6}, |
| 'supporting_ideas': {'min': 3, 'max': 8, 'threshold': 0.6}, |
| 'holistic': {'min': 1, 'max': 5, 'threshold': 0.9} |
| } |
| |
| category_params = params[category] |
| error_count = len(grammar_tool.check(essay)) if grammar_tool else 0 |
| words = essay.split() |
| spelling_errors = len(spell.unknown(words)) if spell else 0 |
| |
| error_density = (error_count + spelling_errors) / len(words) if words else 1 |
| penalty = error_density * 7 |
| |
| base_score = category_params['min'] + (raw_score * (category_params['max'] - category_params['min'])) |
| |
| if category in ['grammar', 'lexical', 'holistic']: |
| base_score = max(category_params['min'], base_score - penalty) |
| |
| return round(max(category_params['min'], min(category_params['max'], base_score)), 1) |
|
|
| def get_predictions_website1(essays): |
| if not model_website1 or not tokenizer: |
| return [] |
| |
| input_ids = [] |
| attention_masks = [] |
| |
| for essay in essays: |
| tokens = tokenize_text(essay, tokenizer) |
| input_ids.append(tokens[0]) |
| attention_masks.append(tokens[1]) |
| |
| input_ids = torch.cat(input_ids, dim=0) |
| attention_masks = torch.cat(attention_masks, dim=0) |
| |
| model_website1.eval() |
| with torch.no_grad(): |
| outputs = model_website1(input_ids, attention_mask=attention_masks) |
| raw_predictions = outputs.logits.cpu().numpy() |
| |
| normalized_predictions = [] |
| categories = ['grammar', 'lexical', 'global_organization', |
| 'local_organization', 'supporting_ideas', 'holistic'] |
| |
| for raw_pred in raw_predictions: |
| raw_scores = 1 / (1 + np.exp(-raw_pred)) |
| norm_pred = [ |
| normalize_bert_score(score, category, essays[0]) |
| for score, category in zip(raw_scores, categories) |
| ] |
| normalized_predictions.append(norm_pred) |
| |
| return normalized_predictions |
|
|
| def calculate_grammar_score(essay): |
| if not grammar_tool: |
| return None |
| |
| matches = grammar_tool.check(essay) |
| error_weights = { |
| 'SPELLING': 2.0, |
| 'GRAMMAR': 2.5, |
| 'PUNCTUATION': 1.5, |
| 'TYPOGRAPHY': 1.0 |
| } |
| |
| weighted_errors = 0 |
| for match in matches: |
| weight = error_weights.get(match.category, 1.5) |
| weighted_errors += weight |
| |
| words = len(essay.split()) |
| error_density = (weighted_errors / words) * 100 if words > 0 else 100 |
| |
| base_score = 10 - (error_density * 0.7) |
| error_types = Counter(match.category for match in matches) |
| repeated_error_penalty = sum(count * 0.3 for count in error_types.values() if count > 2) |
| |
| final_score = base_score - repeated_error_penalty |
| return round(max(2, min(10, final_score)), 1) |
|
|
| def calculate_spelling_score(essay): |
| words = [word.strip('.,!?()[]{}":;') for word in essay.split()] |
| misspelled = spell.unknown(words) if spell else [] |
| |
| total_words = len(words) |
| error_count = len(misspelled) |
| error_rate = error_count / total_words if total_words > 0 else 1 |
| |
| error_penalty = error_rate * 20 |
| if error_count > 5: |
| error_penalty += (error_count - 5) * 0.5 |
| |
| spelling_score = 10 - error_penalty |
| return round(max(2, min(10, spelling_score)), 1) |
|
|
| def calculate_word_diversity(essay): |
| words = essay.lower().translate(str.maketrans('', '', string.punctuation)).split() |
| if not words: |
| return 7.0 |
| |
| misspelled = spell.unknown(words) if spell else [] |
| spelling_penalty = len(misspelled) / len(words) * 5 |
| |
| stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'} |
| content_words = [word for word in words if word not in stop_words] |
| |
| if not content_words: |
| return 7.0 |
| |
| total_words = len(content_words) |
| unique_words = len(set(content_words)) |
| word_freq = Counter(content_words) |
| repeated_words = sum(1 for count in word_freq.values() if count > 2) |
| |
| diversity_ratio = unique_words / total_words |
| repetition_penalty = min(1.5, repeated_words / unique_words) |
| |
| base_score = 8 + (2 * diversity_ratio) |
| final_score = base_score - repetition_penalty - spelling_penalty |
| |
| return round(max(5, min(10, final_score)), 1) |
|
|
| @app.route('/', methods=['GET', 'POST']) |
| def index(): |
| context = { |
| 'essay': '', |
| 'grammar_score': None, |
| 'lexical_score': None, |
| 'global_organization_score': None, |
| 'local_organization_score': None, |
| 'supporting_ideas_score': None, |
| 'holistic_score': None, |
| 'grammar_score2': None, |
| 'spelling_score': None, |
| 'word_diversity_score': None, |
| 'essay_quality_score': None |
| } |
|
|
| if request.method == 'POST': |
| essay = request.form['essay'] |
| context['essay'] = essay |
|
|
| |
| predictions_website1 = get_predictions_website1([essay]) |
| if predictions_website1 and len(predictions_website1[0]) >= 6: |
| context.update({ |
| 'grammar_score': predictions_website1[0][0], |
| 'lexical_score': predictions_website1[0][1], |
| 'global_organization_score': predictions_website1[0][2], |
| 'local_organization_score': predictions_website1[0][3], |
| 'supporting_ideas_score': predictions_website1[0][4], |
| 'holistic_score': min(5.0, predictions_website1[0][5]) |
| }) |
|
|
| |
| context['grammar_score2'] = calculate_grammar_score(essay) |
| context['spelling_score'] = calculate_spelling_score(essay) |
| context['word_diversity_score'] = calculate_word_diversity(essay) |
| |
| |
| if context['holistic_score'] and context['grammar_score2']: |
| context['essay_quality_score'] = round( |
| (context['holistic_score'] * 2 + context['grammar_score2']) / 3, |
| 1 |
| ) |
|
|
| return render_template('index.html', **context) |
|
|
| if __name__ == '__main__': |
| port = int(os.environ.get('PORT', 7860)) |
| app.run(host='0.0.0.0', port=port) |