Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
| import torch | |
| import nltk | |
| from nltk.util import ngrams | |
| from nltk.probability import FreqDist | |
| import plotly.express as px | |
| import torch.nn.functional as F | |
| from collections import Counter | |
| from nltk.corpus import stopwords | |
| import string | |
| import nltk | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| # Initialize tokenizer and model | |
| tokenizer = GPT2Tokenizer.from_pretrained('gpt2') | |
| model = GPT2LMHeadModel.from_pretrained('gpt2') | |
| def c_perplexity(text): | |
| """Calculate the perplexity of the given text using GPT-2.""" | |
| if not text.strip(): | |
| return float('inf') # Return inf for empty input | |
| input_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors='pt') | |
| if input_ids.size(1) == 0: # Check for empty input after encoding | |
| return float('inf') | |
| with torch.no_grad(): | |
| outputs = model(input_ids) | |
| logits = outputs.logits | |
| loss = F.cross_entropy(logits.view(-1, logits.size(-1)), input_ids.view(-1)) | |
| perplexity = torch.exp(loss) | |
| return perplexity.item() | |
| def c_burstiness(text): | |
| """Calculate the burstiness of the given text.""" | |
| tokens = nltk.word_tokenize(text.lower()) | |
| if not tokens: | |
| return 0.0 | |
| word_freq = FreqDist(tokens) | |
| repeated_count = sum(count > 1 for count in word_freq.values()) | |
| b_score = repeated_count / len(word_freq) if len(word_freq) > 0 else 0.0 | |
| return b_score | |
| def top_repword_count(text): | |
| """Generate a bar chart of the top 10 most repeated words.""" | |
| tokens = nltk.word_tokenize(text.lower()) | |
| stop_words = set(stopwords.words('english')) | |
| tokens = [token for token in tokens if token not in stop_words and token not in string.punctuation] | |
| word_counts = Counter(tokens) | |
| top_words = word_counts.most_common(10) | |
| if not top_words: | |
| st.write("No significant words found.") | |
| return | |
| words, counts = zip(*top_words) | |
| fig = px.bar(x=words, y=counts, labels={'x': 'Words', 'y': 'Counts'}, title="Top 10 Most Repeated Words in the Text") | |
| st.plotly_chart(fig, user_container_width=True) | |
| # Streamlit app configuration | |
| st.set_page_config(layout="wide") | |
| st.title("AI Content Detector") | |
| text_area = st.text_area("Enter your text here!") | |
| if text_area: | |
| if st.button("Analyse the content"): | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col1: | |
| st.info("Your input text") | |
| st.success(text_area) | |
| with col2: | |
| st.info("Your output score") | |
| perplexity = c_perplexity(text_area) | |
| burstiness = c_burstiness(text_area) | |
| st.success(f"Perplexity score: {perplexity}") | |
| st.success(f"Burstiness score: {burstiness}") | |
| if perplexity > 40000 or burstiness < 0.24: | |
| st.error("Result: The text is likely AI-generated.") | |
| else: | |
| st.success("Result: The text is not AI-generated.") | |
| st.warning("Disclaimer: AI plagiarism detector apps can assist in identifying potential instances of plagiarism.") | |
| with col3: | |
| st.info("Basic Review") | |
| top_repword_count(text_area) |