Spaces:
Running
Running
File size: 1,710 Bytes
7e47707 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# evaluation.py - дополнительные метрики
import numpy as np
from collections import Counter
def evaluate_text_quality(text):
"""Расширенная оценка качества текста"""
metrics = {}
# 1. Длина текста
metrics['length'] = len(text)
# 2. Разнообразие слов
words = text.lower().split()
unique_words = set(words)
metrics['lexical_diversity'] = len(unique_words) / len(words) if words else 0
# 3. Средняя длина предложения
sentences = text.replace('!', '.').replace('?', '.').split('.')
sentences = [s.strip() for s in sentences if s.strip()]
if sentences:
avg_sentence_len = np.mean([len(s.split()) for s in sentences])
metrics['avg_sentence_len'] = avg_sentence_len
else:
metrics['avg_sentence_len'] = 0
# 4. Повторы (n-граммы)
def get_ngrams(text, n):
words = text.lower().split()
return [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]
bigrams = get_ngrams(text, 2)
if bigrams:
bigram_counts = Counter(bigrams)
most_common = bigram_counts.most_common(1)[0][1] if bigram_counts else 0
metrics['repetition_score'] = 1 - (most_common / len(bigrams))
else:
metrics['repetition_score'] = 0
# Итоговая оценка
total_score = (
min(metrics['length'] / 100, 1) * 0.3 +
metrics['lexical_diversity'] * 0.3 +
min(metrics['avg_sentence_len'] / 20, 1) * 0.2 +
metrics['repetition_score'] * 0.2
) * 10
metrics['total_score'] = round(total_score, 2)
return metrics |