imseldrith's picture
Update app.py
6ecdd5a
import gradio as gr
import spacy
import os
import re
os.system("python -m spacy download en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
def detect_ai_content(text):
# Count the number of words in the text
word_count = len(text.split())
# Analyze the text using Spacy
doc = nlp(text)
# Count the number of tokens that are not in Spacy's default stop word list
non_stopword_tokens = [token for token in doc if not token.is_stop]
non_stopword_count = len(non_stopword_tokens)
# Calculate the percentage of non-stopword tokens
percentage_ai = (1 - non_stopword_count / word_count) * 100
# Clean the text by removing extra spaces, line breaks and special characters
cleaned_text = re.sub(r'\s+', ' ', text).strip()
cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
# Return a dictionary with the percentage of AI-generated content and the cleaned text
return {
"text": cleaned_text,
"percentage": f"{percentage_ai:.2f}% AI-generated content"
}
gr.Interface(detect_ai_content, "text", "json").launch()