Spaces:
Running
Running
| # evaluation.py - дополнительные метрики | |
| import numpy as np | |
| from collections import Counter | |
| def evaluate_text_quality(text): | |
| """Расширенная оценка качества текста""" | |
| metrics = {} | |
| # 1. Длина текста | |
| metrics['length'] = len(text) | |
| # 2. Разнообразие слов | |
| words = text.lower().split() | |
| unique_words = set(words) | |
| metrics['lexical_diversity'] = len(unique_words) / len(words) if words else 0 | |
| # 3. Средняя длина предложения | |
| sentences = text.replace('!', '.').replace('?', '.').split('.') | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| if sentences: | |
| avg_sentence_len = np.mean([len(s.split()) for s in sentences]) | |
| metrics['avg_sentence_len'] = avg_sentence_len | |
| else: | |
| metrics['avg_sentence_len'] = 0 | |
| # 4. Повторы (n-граммы) | |
| def get_ngrams(text, n): | |
| words = text.lower().split() | |
| return [' '.join(words[i:i+n]) for i in range(len(words)-n+1)] | |
| bigrams = get_ngrams(text, 2) | |
| if bigrams: | |
| bigram_counts = Counter(bigrams) | |
| most_common = bigram_counts.most_common(1)[0][1] if bigram_counts else 0 | |
| metrics['repetition_score'] = 1 - (most_common / len(bigrams)) | |
| else: | |
| metrics['repetition_score'] = 0 | |
| # Итоговая оценка | |
| total_score = ( | |
| min(metrics['length'] / 100, 1) * 0.3 + | |
| metrics['lexical_diversity'] * 0.3 + | |
| min(metrics['avg_sentence_len'] / 20, 1) * 0.2 + | |
| metrics['repetition_score'] * 0.2 | |
| ) * 10 | |
| metrics['total_score'] = round(total_score, 2) | |
| return metrics |