Spaces:

Hak978
/

aes-bert

Runtime error

App Files Files Community

Hak978 commited on Apr 15, 2025

Commit

66c7c07

verified ·

1 Parent(s): a16242f

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -110

app.py CHANGED Viewed

@@ -155,14 +155,10 @@ import numpy as np
 import os
 from pathlib import Path
-# warnings.filterwarnings("ignore")
-app = Flask(__name__)
 # Configure cache directories
-cache_base = os.getenv('XDG_CACHE_HOME', '/app/cache')
 huggingface_cache = os.path.join(cache_base, 'huggingface')
 languagetool_cache = os.path.join(cache_base, 'languagetool')
@@ -170,44 +166,19 @@ languagetool_cache = os.path.join(cache_base, 'languagetool')
 Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
 Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
-# Initialize LanguageTool with explicit cache
-# try:
-#     grammar_tool = LanguageTool(
-#         'en-US',
-#         config={
-#             'cacheDir': os.getenv('LT_CACHE', '/tmp/languagetool')
-#         },
-#         remote_server='https://api.languagetool.org'  # Remote server as separate parameter
-#     )
-#     print("LanguageTool initialized successfully")
-# except Exception as e:
-#     print(f"Error initializing LanguageTool: {e}")
-#     grammar_tool = None
-# Configure LanguageTool cache
-lt_cache = os.getenv('LT_CACHE', '/app/cache/languagetool')
-Path(lt_cache).mkdir(parents=True, exist_ok=True)
 try:
-    # Option 1: Force remote server (recommended)
     grammar_tool = LanguageTool(
         'en-US',
         remote_server='https://api.languagetool.org'
     )
-    # Option 2: Local server with explicit cache (if really needed)
-    # grammar_tool = LanguageTool(
-    #    'en-US',
-    #    config={
-    #        'cacheDir': lt_cache,
-    #        'server': 'https://api.languagetool.org'
-    #    }
-    # )
     print("LanguageTool initialized successfully")
 except Exception as e:
     print(f"Error initializing LanguageTool: {e}")
     grammar_tool = None
 # Load Hugging Face models
 MODEL_NAME = "Hak978/aes-bert-models"
@@ -234,92 +205,193 @@ except Exception as e:
     print(f"Error loading models: {e}")
     model_website1 = model_website2 = tokenizer = None
-def check_spelling(text):
-    words = text.split()
-    misspelled = spell.unknown(words)
-    return list(misspelled)
-def check_grammar(text):
-    matches = grammar_tool.check(text)
-    return [{'message': match.message, 'replacements': match.replacements} for match in matches]
-def count_words(text):
-    words = text.split()
-    return len(words)
-def calculate_sentence_lengths(text):
-    sentences = text.split('.')
-    lengths = [len(sentence.split()) for sentence in sentences if sentence.strip()]
-    return {
-        'average': np.mean(lengths) if lengths else 0,
-        'min': min(lengths) if lengths else 0,
-        'max': max(lengths) if lengths else 0
     }
-def calculate_vocabulary_diversity(text):
-    words = text.lower().split()
-    unique_words = set(words)
-    return len(unique_words) / len(words) if words else 0
-def count_punctuation(text):
-    return sum(1 for char in text if char in string.punctuation)
-def predict_score(text, model, tokenizer):
-    # Tokenize and prepare input
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
-    # Get model prediction
-    with torch.no_grad():
-        outputs = model(**inputs)
-        predictions = outputs.logits
-    # Convert prediction to score (assuming regression model)
-    predicted_score = predictions.item()
-    return predicted_score
-@app.route('/')
-def home():
-    return render_template('index.html')
-@app.route('/analyze', methods=['POST'])
-def analyze():
     if request.method == 'POST':
-        essay_text = request.form['essay']
-        # Basic statistics
-        word_count = count_words(essay_text)
-        sentence_stats = calculate_sentence_lengths(essay_text)
-        vocabulary_diversity = calculate_vocabulary_diversity(essay_text)
-        punctuation_count = count_punctuation(essay_text)
-        # Spelling and grammar checks
-        spelling_errors = check_spelling(essay_text)
-        grammar_errors = check_grammar(essay_text)
-        # Model predictions
-        score1 = predict_score(essay_text, model_website1, tokenizer_website1)
-        score2 = predict_score(essay_text, model_website2, tokenizer_website2)
-        # Calculate average score
-        average_score = (score1 + score2) / 2
-        # Prepare feedback
-        feedback = {
-            'word_count': word_count,
-            'avg_sentence_length': round(sentence_stats['average'], 2),
-            'min_sentence_length': int(sentence_stats['min']),
-            'max_sentence_length': int(sentence_stats['max']),
-            'vocabulary_diversity': round(vocabulary_diversity * 100, 2),
-            'punctuation_count': punctuation_count,
-            'spelling_errors': spelling_errors,
-            'grammar_errors': grammar_errors,
-            'score1': round(score1, 2),
-            'score2': round(score2, 2),
-            'average_score': round(average_score, 2)
-        }
-        return render_template('result.html', feedback=feedback)
 if __name__ == '__main__':
-    port = int(os.environ.get('PORT', 7860))  # Changed from 5000 to 7860 for Spaces
     app.run(host='0.0.0.0', port=port)

 import os
 from pathlib import Path
+app = Flask(__name__, template_folder='.')
 # Configure cache directories
+cache_base = os.getenv('XDG_CACHE_HOME', '/tmp/cache')
 huggingface_cache = os.path.join(cache_base, 'huggingface')
 languagetool_cache = os.path.join(cache_base, 'languagetool')
 Path(huggingface_cache).mkdir(parents=True, exist_ok=True)
 Path(languagetool_cache).mkdir(parents=True, exist_ok=True)
+# Initialize LanguageTool
 try:
     grammar_tool = LanguageTool(
         'en-US',
         remote_server='https://api.languagetool.org'
     )
     print("LanguageTool initialized successfully")
 except Exception as e:
     print(f"Error initializing LanguageTool: {e}")
     grammar_tool = None
+# Initialize SpellChecker
+spell = SpellChecker()
 # Load Hugging Face models
 MODEL_NAME = "Hak978/aes-bert-models"
     print(f"Error loading models: {e}")
     model_website1 = model_website2 = tokenizer = None
+def tokenize_text(text, tokenizer):
+    tokens = tokenizer.encode_plus(
+        text,
+        add_special_tokens=True,
+        max_length=512,
+        truncation=True,
+        return_token_type_ids=False,
+        padding='max_length',
+        return_attention_mask=True,
+        return_tensors='pt'
+    )
+    return tokens['input_ids'], tokens['attention_mask']
+def normalize_bert_score(raw_score, category, essay):
+    params = {
+        'grammar': {'min': 1, 'max': 8, 'threshold': 0.8},
+        'lexical': {'min': 1, 'max': 8, 'threshold': 0.8},
+        'global_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
+        'local_organization': {'min': 3, 'max': 8, 'threshold': 0.6},
+        'supporting_ideas': {'min': 3, 'max': 8, 'threshold': 0.6},
+        'holistic': {'min': 1, 'max': 5, 'threshold': 0.9}
     }
+    category_params = params[category]
+    error_count = len(grammar_tool.check(essay)) if grammar_tool else 0
+    words = essay.split()
+    spelling_errors = len(spell.unknown(words)) if spell else 0
+    error_density = (error_count + spelling_errors) / len(words) if words else 1
+    penalty = error_density * 7
+    base_score = category_params['min'] + (raw_score * (category_params['max'] - category_params['min']))
+    if category in ['grammar', 'lexical', 'holistic']:
+        base_score = max(category_params['min'], base_score - penalty)
+    return round(max(category_params['min'], min(category_params['max'], base_score)), 1)
+def get_predictions_website1(essays):
+    if not model_website1 or not tokenizer:
+        return []
+    input_ids = []
+    attention_masks = []
+    for essay in essays:
+        tokens = tokenize_text(essay, tokenizer)
+        input_ids.append(tokens[0])
+        attention_masks.append(tokens[1])
+    input_ids = torch.cat(input_ids, dim=0)
+    attention_masks = torch.cat(attention_masks, dim=0)
+    model_website1.eval()
+    with torch.no_grad():
+        outputs = model_website1(input_ids, attention_mask=attention_masks)
+        raw_predictions = outputs.logits.cpu().numpy()
+    normalized_predictions = []
+    categories = ['grammar', 'lexical', 'global_organization',
+                 'local_organization', 'supporting_ideas', 'holistic']
+    for raw_pred in raw_predictions:
+        raw_scores = 1 / (1 + np.exp(-raw_pred))
+        norm_pred = [
+            normalize_bert_score(score, category, essays[0])
+            for score, category in zip(raw_scores, categories)
+        ]
+        normalized_predictions.append(norm_pred)
+    return normalized_predictions
+def calculate_grammar_score(essay):
+    if not grammar_tool:
+        return None
+    matches = grammar_tool.check(essay)
+    error_weights = {
+        'SPELLING': 2.0,
+        'GRAMMAR': 2.5,
+        'PUNCTUATION': 1.5,
+        'TYPOGRAPHY': 1.0
+    }
+    weighted_errors = 0
+    for match in matches:
+        weight = error_weights.get(match.category, 1.5)
+        weighted_errors += weight
+    words = len(essay.split())
+    error_density = (weighted_errors / words) * 100 if words > 0 else 100
+    base_score = 10 - (error_density * 0.7)
+    error_types = Counter(match.category for match in matches)
+    repeated_error_penalty = sum(count * 0.3 for count in error_types.values() if count > 2)
+    final_score = base_score - repeated_error_penalty
+    return round(max(2, min(10, final_score)), 1)
+def calculate_spelling_score(essay):
+    words = [word.strip('.,!?()[]{}":;') for word in essay.split()]
+    misspelled = spell.unknown(words) if spell else []
+    total_words = len(words)
+    error_count = len(misspelled)
+    error_rate = error_count / total_words if total_words > 0 else 1
+    error_penalty = error_rate * 20
+    if error_count > 5:
+        error_penalty += (error_count - 5) * 0.5
+    spelling_score = 10 - error_penalty
+    return round(max(2, min(10, spelling_score)), 1)
+def calculate_word_diversity(essay):
+    words = essay.lower().translate(str.maketrans('', '', string.punctuation)).split()
+    if not words:
+        return 7.0
+    misspelled = spell.unknown(words) if spell else []
+    spelling_penalty = len(misspelled) / len(words) * 5
+    stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
+    content_words = [word for word in words if word not in stop_words]
+    if not content_words:
+        return 7.0
+    total_words = len(content_words)
+    unique_words = len(set(content_words))
+    word_freq = Counter(content_words)
+    repeated_words = sum(1 for count in word_freq.values() if count > 2)
+    diversity_ratio = unique_words / total_words
+    repetition_penalty = min(1.5, repeated_words / unique_words)
+    base_score = 8 + (2 * diversity_ratio)
+    final_score = base_score - repetition_penalty - spelling_penalty
+    return round(max(5, min(10, final_score)), 1)
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    context = {
+        'essay': '',
+        'grammar_score': None,
+        'lexical_score': None,
+        'global_organization_score': None,
+        'local_organization_score': None,
+        'supporting_ideas_score': None,
+        'holistic_score': None,
+        'grammar_score2': None,
+        'spelling_score': None,
+        'word_diversity_score': None,
+        'essay_quality_score': None
+    }
     if request.method == 'POST':
+        essay = request.form['essay']
+        context['essay'] = essay
+        # Website 1 predictions
+        predictions_website1 = get_predictions_website1([essay])
+        if predictions_website1 and len(predictions_website1[0]) >= 6:
+            context.update({
+                'grammar_score': predictions_website1[0][0],
+                'lexical_score': predictions_website1[0][1],
+                'global_organization_score': predictions_website1[0][2],
+                'local_organization_score': predictions_website1[0][3],
+                'supporting_ideas_score': predictions_website1[0][4],
+                'holistic_score': min(5.0, predictions_website1[0][5])
+            })
+        # Website 2 predictions
+        context['grammar_score2'] = calculate_grammar_score(essay)
+        context['spelling_score'] = calculate_spelling_score(essay)
+        context['word_diversity_score'] = calculate_word_diversity(essay)
+        # Calculate overall quality score
+        if context['holistic_score'] and context['grammar_score2']:
+            context['essay_quality_score'] = round(
+                (context['holistic_score'] * 2 + context['grammar_score2']) / 3,
+                1
+            )
+    return render_template('index.html', **context)
 if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 7860))
     app.run(host='0.0.0.0', port=port)