textanalysis01 / app.py
MK-316's picture
Update app.py
a01a195 verified
import gradio as gr
import re
import math
# Define the TTR, MTLD, and Flesch Reading Ease functions as previously described
# ... (functions go here)
# TTR
def calculate_ttr(text):
words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
unique_words = len(set(words))
total_words = len(words)
ttr = unique_words / total_words if total_words != 0 else 0
return math.ceil(ttr * 100) / 100 # Rounded up to 2 decimal places
# MTLD
def calculate_mtld(text, ttr_threshold=0.72):
def mtld_calculation(word_list, threshold):
token_count = 0
factor_count = 0
for i in range(len(word_list)):
token_count += 1
unique_words = len(set(word_list[:i + 1]))
current_ttr = unique_words / token_count
if current_ttr < threshold:
factor_count += 1
token_count = 0
if token_count > 0:
factor_count += token_count / len(word_list)
return len(word_list) / factor_count if factor_count != 0 else 0
words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
mtld_forward = mtld_calculation(words, ttr_threshold)
mtld_backward = mtld_calculation(words[::-1], ttr_threshold)
mtld = (mtld_forward + mtld_backward) / 2
return math.ceil(mtld * 100) / 100 # Rounded up to 2 decimal places
# Flesch Reading Ease
def count_syllables(word):
syllable_count = 0
vowels = "aeiouy"
if word[0] in vowels:
syllable_count += 1
for index in range(1, len(word)):
if word[index] in vowels and word[index - 1] not in vowels:
syllable_count += 1
if word.endswith("e"):
syllable_count -= 1
if syllable_count == 0:
syllable_count += 1
return syllable_count
def calculate_flesch_reading_ease(text):
sentences = re.split(r'[.!?]+', text)
words = re.findall(r'\b[a-zA-Z]+\b', text)
total_sentences = len(sentences) - 1 if sentences[-1] == '' else len(sentences) # Adjusting for the case when text ends with a punctuation mark
total_words = len(words)
total_syllables = sum(count_syllables(word) for word in words)
if total_sentences == 0 or total_words == 0: # Prevent division by zero
return 0
flesch_score = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
return math.ceil(flesch_score * 100) / 100 # Rounded up to 2 decimal places
def calculate_flesch_kincaid_grade_level(text):
sentences = re.split(r'[.!?]+', text)
words = re.findall(r'\b[a-zA-Z]+\b', text)
total_sentences = len(sentences) - 1 if sentences[-1] == '' else len(sentences)
total_words = len(words)
total_syllables = sum(count_syllables(word) for word in words)
if total_sentences == 0 or total_words == 0: # Prevent division by zero
return 0
fk_grade_level = 0.39 * (total_words / total_sentences) + 11.8 * (total_syllables / total_words) - 15.59
return math.ceil(fk_grade_level * 100) / 100 # Rounded up to 2 decimal places
def analyze_text(text):
word_count = len(re.findall(r'\b[a-zA-Z]+\b', text))
ttr = calculate_ttr(text)
mtld = calculate_mtld(text)
flesch_score = calculate_flesch_reading_ease(text)
fk_grade_level = calculate_flesch_kincaid_grade_level(text)
return word_count, round(ttr, 2), round(mtld, 2), round(flesch_score, 2), round(fk_grade_level, 2)
interface = gr.Interface(
fn=analyze_text,
inputs=gr.Textbox(lines=10, label="Input Text"),
outputs=[
gr.Textbox(label="Word Count"),
gr.Textbox(label="Type-Token Ratio (TTR)"),
gr.Textbox(label="Measure of Textual Lexical Diversity (MTLD)"),
gr.Textbox(label="Flesch Reading Ease (Readability measure)"),
gr.Textbox(label="Flesch-Kincaid Grade Level (Readability index)")
],
title="Text Analysis Tool",
description="Enter text to analyze its word count, Type-Token Ratio (TTR), Measure of Textual Lexical Diversity (MTLD), Flesch Reading Ease, and Flesch-Kincaid Grade Level. Note: The Flesch-Kincaid Grade Level indicates the U.S. school grade level needed to understand the text."
)
interface.launch()