Update app.py
Browse files
app.py
CHANGED
|
@@ -155,14 +155,10 @@ import numpy as np
|
|
| 155 |
import os
|
| 156 |
from pathlib import Path
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
# warnings.filterwarnings("ignore")
|
| 161 |
-
|
| 162 |
-
app = Flask(__name__)
|
| 163 |
|
| 164 |
# Configure cache directories
|
| 165 |
-
cache_base = os.getenv('XDG_CACHE_HOME', '/
|
| 166 |
huggingface_cache = os.path.join(cache_base, 'huggingface')
|
| 167 |
languagetool_cache = os.path.join(cache_base, 'languagetool')
|
| 168 |
|
|
@@ -170,44 +166,19 @@ languagetool_cache = os.path.join(cache_base, 'languagetool')
|
|
| 170 |
Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
|
| 171 |
Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
|
| 172 |
|
| 173 |
-
# Initialize LanguageTool
|
| 174 |
-
# try:
|
| 175 |
-
# grammar_tool = LanguageTool(
|
| 176 |
-
# 'en-US',
|
| 177 |
-
# config={
|
| 178 |
-
# 'cacheDir': os.getenv('LT_CACHE', '/tmp/languagetool')
|
| 179 |
-
# },
|
| 180 |
-
# remote_server='https://api.languagetool.org' # Remote server as separate parameter
|
| 181 |
-
# )
|
| 182 |
-
# print("LanguageTool initialized successfully")
|
| 183 |
-
# except Exception as e:
|
| 184 |
-
# print(f"Error initializing LanguageTool: {e}")
|
| 185 |
-
# grammar_tool = None
|
| 186 |
-
# Configure LanguageTool cache
|
| 187 |
-
lt_cache = os.getenv('LT_CACHE', '/app/cache/languagetool')
|
| 188 |
-
Path(lt_cache).mkdir(parents=True, exist_ok=True)
|
| 189 |
-
|
| 190 |
try:
|
| 191 |
-
# Option 1: Force remote server (recommended)
|
| 192 |
grammar_tool = LanguageTool(
|
| 193 |
'en-US',
|
| 194 |
remote_server='https://api.languagetool.org'
|
| 195 |
)
|
| 196 |
-
|
| 197 |
-
# Option 2: Local server with explicit cache (if really needed)
|
| 198 |
-
# grammar_tool = LanguageTool(
|
| 199 |
-
# 'en-US',
|
| 200 |
-
# config={
|
| 201 |
-
# 'cacheDir': lt_cache,
|
| 202 |
-
# 'server': 'https://api.languagetool.org'
|
| 203 |
-
# }
|
| 204 |
-
# )
|
| 205 |
-
|
| 206 |
print("LanguageTool initialized successfully")
|
| 207 |
except Exception as e:
|
| 208 |
print(f"Error initializing LanguageTool: {e}")
|
| 209 |
grammar_tool = None
|
| 210 |
|
|
|
|
|
|
|
| 211 |
|
| 212 |
# Load Hugging Face models
|
| 213 |
MODEL_NAME = "Hak978/aes-bert-models"
|
|
@@ -234,92 +205,193 @@ except Exception as e:
|
|
| 234 |
print(f"Error loading models: {e}")
|
| 235 |
model_website1 = model_website2 = tokenizer = None
|
| 236 |
|
| 237 |
-
def
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
return
|
| 249 |
-
|
| 250 |
-
def
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
'
|
| 255 |
-
'
|
| 256 |
-
'
|
|
|
|
| 257 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
def
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
-
def
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
def
|
| 268 |
-
|
| 269 |
-
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
predictions = outputs.logits
|
| 275 |
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
-
@app.route('/analyze', methods=['POST'])
|
| 285 |
-
def analyze():
|
| 286 |
if request.method == 'POST':
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
grammar_errors = check_grammar(essay_text)
|
| 298 |
-
|
| 299 |
-
# Model predictions
|
| 300 |
-
score1 = predict_score(essay_text, model_website1, tokenizer_website1)
|
| 301 |
-
score2 = predict_score(essay_text, model_website2, tokenizer_website2)
|
| 302 |
-
|
| 303 |
-
# Calculate average score
|
| 304 |
-
average_score = (score1 + score2) / 2
|
| 305 |
-
|
| 306 |
-
# Prepare feedback
|
| 307 |
-
feedback = {
|
| 308 |
-
'word_count': word_count,
|
| 309 |
-
'avg_sentence_length': round(sentence_stats['average'], 2),
|
| 310 |
-
'min_sentence_length': int(sentence_stats['min']),
|
| 311 |
-
'max_sentence_length': int(sentence_stats['max']),
|
| 312 |
-
'vocabulary_diversity': round(vocabulary_diversity * 100, 2),
|
| 313 |
-
'punctuation_count': punctuation_count,
|
| 314 |
-
'spelling_errors': spelling_errors,
|
| 315 |
-
'grammar_errors': grammar_errors,
|
| 316 |
-
'score1': round(score1, 2),
|
| 317 |
-
'score2': round(score2, 2),
|
| 318 |
-
'average_score': round(average_score, 2)
|
| 319 |
-
}
|
| 320 |
-
|
| 321 |
-
return render_template('result.html', feedback=feedback)
|
| 322 |
|
| 323 |
if __name__ == '__main__':
|
| 324 |
-
port = int(os.environ.get('PORT', 7860))
|
| 325 |
app.run(host='0.0.0.0', port=port)
|
|
|
|
| 155 |
import os
|
| 156 |
from pathlib import Path
|
| 157 |
|
| 158 |
+
app = Flask(__name__, template_folder='.')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
# Configure cache directories
|
| 161 |
+
cache_base = os.getenv('XDG_CACHE_HOME', '/tmp/cache')
|
| 162 |
huggingface_cache = os.path.join(cache_base, 'huggingface')
|
| 163 |
languagetool_cache = os.path.join(cache_base, 'languagetool')
|
| 164 |
|
|
|
|
| 166 |
Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
|
| 167 |
Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
|
| 168 |
|
| 169 |
+
# Initialize LanguageTool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
try:
|
|
|
|
| 171 |
grammar_tool = LanguageTool(
|
| 172 |
'en-US',
|
| 173 |
remote_server='https://api.languagetool.org'
|
| 174 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
print("LanguageTool initialized successfully")
|
| 176 |
except Exception as e:
|
| 177 |
print(f"Error initializing LanguageTool: {e}")
|
| 178 |
grammar_tool = None
|
| 179 |
|
| 180 |
+
# Initialize SpellChecker
|
| 181 |
+
spell = SpellChecker()
|
| 182 |
|
| 183 |
# Load Hugging Face models
|
| 184 |
MODEL_NAME = "Hak978/aes-bert-models"
|
|
|
|
| 205 |
print(f"Error loading models: {e}")
|
| 206 |
model_website1 = model_website2 = tokenizer = None
|
| 207 |
|
| 208 |
+
def tokenize_text(text, tokenizer):
|
| 209 |
+
tokens = tokenizer.encode_plus(
|
| 210 |
+
text,
|
| 211 |
+
add_special_tokens=True,
|
| 212 |
+
max_length=512,
|
| 213 |
+
truncation=True,
|
| 214 |
+
return_token_type_ids=False,
|
| 215 |
+
padding='max_length',
|
| 216 |
+
return_attention_mask=True,
|
| 217 |
+
return_tensors='pt'
|
| 218 |
+
)
|
| 219 |
+
return tokens['input_ids'], tokens['attention_mask']
|
| 220 |
+
|
| 221 |
+
def normalize_bert_score(raw_score, category, essay):
|
| 222 |
+
params = {
|
| 223 |
+
'grammar': {'min': 1, 'max': 8, 'threshold': 0.8},
|
| 224 |
+
'lexical': {'min': 1, 'max': 8, 'threshold': 0.8},
|
| 225 |
+
'global_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
|
| 226 |
+
'local_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
|
| 227 |
+
'supporting_ideas': {'min': 3, 'max': 8, 'threshold': 0.6},
|
| 228 |
+
'holistic': {'min': 1, 'max': 5, 'threshold': 0.9}
|
| 229 |
}
|
| 230 |
+
|
| 231 |
+
category_params = params[category]
|
| 232 |
+
error_count = len(grammar_tool.check(essay)) if grammar_tool else 0
|
| 233 |
+
words = essay.split()
|
| 234 |
+
spelling_errors = len(spell.unknown(words)) if spell else 0
|
| 235 |
+
|
| 236 |
+
error_density = (error_count + spelling_errors) / len(words) if words else 1
|
| 237 |
+
penalty = error_density * 7
|
| 238 |
+
|
| 239 |
+
base_score = category_params['min'] + (raw_score * (category_params['max'] - category_params['min']))
|
| 240 |
+
|
| 241 |
+
if category in ['grammar', 'lexical', 'holistic']:
|
| 242 |
+
base_score = max(category_params['min'], base_score - penalty)
|
| 243 |
+
|
| 244 |
+
return round(max(category_params['min'], min(category_params['max'], base_score)), 1)
|
| 245 |
|
| 246 |
+
def get_predictions_website1(essays):
|
| 247 |
+
if not model_website1 or not tokenizer:
|
| 248 |
+
return []
|
| 249 |
+
|
| 250 |
+
input_ids = []
|
| 251 |
+
attention_masks = []
|
| 252 |
+
|
| 253 |
+
for essay in essays:
|
| 254 |
+
tokens = tokenize_text(essay, tokenizer)
|
| 255 |
+
input_ids.append(tokens[0])
|
| 256 |
+
attention_masks.append(tokens[1])
|
| 257 |
+
|
| 258 |
+
input_ids = torch.cat(input_ids, dim=0)
|
| 259 |
+
attention_masks = torch.cat(attention_masks, dim=0)
|
| 260 |
+
|
| 261 |
+
model_website1.eval()
|
| 262 |
+
with torch.no_grad():
|
| 263 |
+
outputs = model_website1(input_ids, attention_mask=attention_masks)
|
| 264 |
+
raw_predictions = outputs.logits.cpu().numpy()
|
| 265 |
+
|
| 266 |
+
normalized_predictions = []
|
| 267 |
+
categories = ['grammar', 'lexical', 'global_organization',
|
| 268 |
+
'local_organization', 'supporting_ideas', 'holistic']
|
| 269 |
+
|
| 270 |
+
for raw_pred in raw_predictions:
|
| 271 |
+
raw_scores = 1 / (1 + np.exp(-raw_pred))
|
| 272 |
+
norm_pred = [
|
| 273 |
+
normalize_bert_score(score, category, essays[0])
|
| 274 |
+
for score, category in zip(raw_scores, categories)
|
| 275 |
+
]
|
| 276 |
+
normalized_predictions.append(norm_pred)
|
| 277 |
+
|
| 278 |
+
return normalized_predictions
|
| 279 |
|
| 280 |
+
def calculate_grammar_score(essay):
|
| 281 |
+
if not grammar_tool:
|
| 282 |
+
return None
|
| 283 |
+
|
| 284 |
+
matches = grammar_tool.check(essay)
|
| 285 |
+
error_weights = {
|
| 286 |
+
'SPELLING': 2.0,
|
| 287 |
+
'GRAMMAR': 2.5,
|
| 288 |
+
'PUNCTUATION': 1.5,
|
| 289 |
+
'TYPOGRAPHY': 1.0
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
weighted_errors = 0
|
| 293 |
+
for match in matches:
|
| 294 |
+
weight = error_weights.get(match.category, 1.5)
|
| 295 |
+
weighted_errors += weight
|
| 296 |
+
|
| 297 |
+
words = len(essay.split())
|
| 298 |
+
error_density = (weighted_errors / words) * 100 if words > 0 else 100
|
| 299 |
+
|
| 300 |
+
base_score = 10 - (error_density * 0.7)
|
| 301 |
+
error_types = Counter(match.category for match in matches)
|
| 302 |
+
repeated_error_penalty = sum(count * 0.3 for count in error_types.values() if count > 2)
|
| 303 |
+
|
| 304 |
+
final_score = base_score - repeated_error_penalty
|
| 305 |
+
return round(max(2, min(10, final_score)), 1)
|
| 306 |
|
| 307 |
+
def calculate_spelling_score(essay):
|
| 308 |
+
words = [word.strip('.,!?()[]{}":;') for word in essay.split()]
|
| 309 |
+
misspelled = spell.unknown(words) if spell else []
|
| 310 |
|
| 311 |
+
total_words = len(words)
|
| 312 |
+
error_count = len(misspelled)
|
| 313 |
+
error_rate = error_count / total_words if total_words > 0 else 1
|
|
|
|
| 314 |
|
| 315 |
+
error_penalty = error_rate * 20
|
| 316 |
+
if error_count > 5:
|
| 317 |
+
error_penalty += (error_count - 5) * 0.5
|
| 318 |
+
|
| 319 |
+
spelling_score = 10 - error_penalty
|
| 320 |
+
return round(max(2, min(10, spelling_score)), 1)
|
| 321 |
|
| 322 |
+
def calculate_word_diversity(essay):
|
| 323 |
+
words = essay.lower().translate(str.maketrans('', '', string.punctuation)).split()
|
| 324 |
+
if not words:
|
| 325 |
+
return 7.0
|
| 326 |
+
|
| 327 |
+
misspelled = spell.unknown(words) if spell else []
|
| 328 |
+
spelling_penalty = len(misspelled) / len(words) * 5
|
| 329 |
+
|
| 330 |
+
stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
|
| 331 |
+
content_words = [word for word in words if word not in stop_words]
|
| 332 |
+
|
| 333 |
+
if not content_words:
|
| 334 |
+
return 7.0
|
| 335 |
+
|
| 336 |
+
total_words = len(content_words)
|
| 337 |
+
unique_words = len(set(content_words))
|
| 338 |
+
word_freq = Counter(content_words)
|
| 339 |
+
repeated_words = sum(1 for count in word_freq.values() if count > 2)
|
| 340 |
+
|
| 341 |
+
diversity_ratio = unique_words / total_words
|
| 342 |
+
repetition_penalty = min(1.5, repeated_words / unique_words)
|
| 343 |
+
|
| 344 |
+
base_score = 8 + (2 * diversity_ratio)
|
| 345 |
+
final_score = base_score - repetition_penalty - spelling_penalty
|
| 346 |
+
|
| 347 |
+
return round(max(5, min(10, final_score)), 1)
|
| 348 |
+
|
| 349 |
+
@app.route('/', methods=['GET', 'POST'])
|
| 350 |
+
def index():
|
| 351 |
+
context = {
|
| 352 |
+
'essay': '',
|
| 353 |
+
'grammar_score': None,
|
| 354 |
+
'lexical_score': None,
|
| 355 |
+
'global_organization_score': None,
|
| 356 |
+
'local_organization_score': None,
|
| 357 |
+
'supporting_ideas_score': None,
|
| 358 |
+
'holistic_score': None,
|
| 359 |
+
'grammar_score2': None,
|
| 360 |
+
'spelling_score': None,
|
| 361 |
+
'word_diversity_score': None,
|
| 362 |
+
'essay_quality_score': None
|
| 363 |
+
}
|
| 364 |
|
|
|
|
|
|
|
| 365 |
if request.method == 'POST':
|
| 366 |
+
essay = request.form['essay']
|
| 367 |
+
context['essay'] = essay
|
| 368 |
+
|
| 369 |
+
# Website 1 predictions
|
| 370 |
+
predictions_website1 = get_predictions_website1([essay])
|
| 371 |
+
if predictions_website1 and len(predictions_website1[0]) >= 6:
|
| 372 |
+
context.update({
|
| 373 |
+
'grammar_score': predictions_website1[0][0],
|
| 374 |
+
'lexical_score': predictions_website1[0][1],
|
| 375 |
+
'global_organization_score': predictions_website1[0][2],
|
| 376 |
+
'local_organization_score': predictions_website1[0][3],
|
| 377 |
+
'supporting_ideas_score': predictions_website1[0][4],
|
| 378 |
+
'holistic_score': min(5.0, predictions_website1[0][5])
|
| 379 |
+
})
|
| 380 |
+
|
| 381 |
+
# Website 2 predictions
|
| 382 |
+
context['grammar_score2'] = calculate_grammar_score(essay)
|
| 383 |
+
context['spelling_score'] = calculate_spelling_score(essay)
|
| 384 |
+
context['word_diversity_score'] = calculate_word_diversity(essay)
|
| 385 |
|
| 386 |
+
# Calculate overall quality score
|
| 387 |
+
if context['holistic_score'] and context['grammar_score2']:
|
| 388 |
+
context['essay_quality_score'] = round(
|
| 389 |
+
(context['holistic_score'] * 2 + context['grammar_score2']) / 3,
|
| 390 |
+
1
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
return render_template('index.html', **context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
if __name__ == '__main__':
|
| 396 |
+
port = int(os.environ.get('PORT', 7860))
|
| 397 |
app.run(host='0.0.0.0', port=port)
|