Spaces:
Running
Running
| import numpy as np | |
| import pandas as pd | |
| import textstat | |
| import joblib | |
| # Load model and vectorizer | |
| model = joblib.load("Models/ai_detector_model.pkl") | |
| vectorizer = joblib.load("Models/vectorizer.pkl") | |
| def calculate_readability(text): | |
| """Calculate readability score for the text""" | |
| return textstat.flesch_reading_ease(text) | |
| def lexical_diversity(text): | |
| """Compute lexical diversity = unique words / total words""" | |
| words = text.split() | |
| return len(set(words)) / len(words) if words else 0 | |
| def sentence_length(text): | |
| """Compute average sentence length""" | |
| sentences = text.split('.') | |
| return sum(len(s.split()) for s in sentences) / len(sentences) if sentences else 0 | |
| def preprocess_text(text): | |
| """Convert text to feature vectors (TF-IDF + readability metrics)""" | |
| # Convert input text into a DataFrame | |
| df_sample = pd.DataFrame({'text': [text]}) | |
| # Extract additional features | |
| df_sample['readability'] = df_sample['text'].apply(calculate_readability) | |
| df_sample['lexical_diversity'] = df_sample['text'].apply(lexical_diversity) | |
| df_sample['sentence_length'] = df_sample['text'].apply(sentence_length) | |
| # Convert text to TF-IDF vector | |
| X_tfidf = vectorizer.transform(df_sample['text']) | |
| # Combine TF-IDF features with extracted features | |
| X_sample = np.hstack((X_tfidf.toarray(), | |
| df_sample[['readability', 'lexical_diversity', 'sentence_length']].values)) | |
| return X_sample | |
| def predict_text(text): | |
| X_sample = preprocess_text(text) | |
| prediction = model.predict(X_sample)[0] | |
| confidence = model.predict_proba(X_sample)[:,1][0] | |
| return prediction, confidence | |