ai-vs-human / predictor.py
SerialGuy's picture
Updating UI and Some fixes
167a41f
import numpy as np
import pandas as pd
import textstat
import joblib
# Load model and vectorizer
model = joblib.load("Models/ai_detector_model.pkl")
vectorizer = joblib.load("Models/vectorizer.pkl")
def calculate_readability(text):
"""Calculate readability score for the text"""
return textstat.flesch_reading_ease(text)
def lexical_diversity(text):
"""Compute lexical diversity = unique words / total words"""
words = text.split()
return len(set(words)) / len(words) if words else 0
def sentence_length(text):
"""Compute average sentence length"""
sentences = text.split('.')
return sum(len(s.split()) for s in sentences) / len(sentences) if sentences else 0
def preprocess_text(text):
"""Convert text to feature vectors (TF-IDF + readability metrics)"""
# Convert input text into a DataFrame
df_sample = pd.DataFrame({'text': [text]})
# Extract additional features
df_sample['readability'] = df_sample['text'].apply(calculate_readability)
df_sample['lexical_diversity'] = df_sample['text'].apply(lexical_diversity)
df_sample['sentence_length'] = df_sample['text'].apply(sentence_length)
# Convert text to TF-IDF vector
X_tfidf = vectorizer.transform(df_sample['text'])
# Combine TF-IDF features with extracted features
X_sample = np.hstack((X_tfidf.toarray(),
df_sample[['readability', 'lexical_diversity', 'sentence_length']].values))
return X_sample
def predict_text(text):
X_sample = preprocess_text(text)
prediction = model.predict(X_sample)[0]
confidence = model.predict_proba(X_sample)[:,1][0]
return prediction, confidence