AssessingSocialMedia / bot_detector.py
waqasbm's picture
Create bot_detector.py
9e6c511 verified
# bot_detector.py
import re
import numpy as np
from datetime import datetime
from sklearn.cluster import DBSCAN
# Feature Extraction Functions
def is_generic_name(username):
return bool(re.match(r'^[a-z]+\d{3,}$', username)) or 'user' in username.lower()
def is_new_account(created_date):
return (datetime.now() - created_date).days < 7 if created_date else False
def detect_generic_text(comments):
generic_phrases = ['great post!', 'awesome', 'nice', 'cool', 'thanks for sharing']
return [c for c in comments if any(p in c.lower() for p in generic_phrases)]
def analyze_timing(timestamps):
deltas = np.diff(sorted(timestamps))
return np.mean(deltas), np.std(deltas)
def cluster_engagers(engagements):
# Simple clustering based on engagement patterns
features = [[e['followers'], e['posts_count'], e['engagement_freq']]
for e in engagements]
return DBSCAN(eps=0.5, min_samples=3).fit_predict(features)
# Main Detection Function
def analyze_post(post_url):
# Data collection would happen here
engagements = fetch_engagements(post_url)
# Analysis pipeline
results = {
'suspicious_profiles': [],
'duplicate_comments': [],
'time_analysis': {},
'content_analysis': {}
}
if engagements:
# Profile analysis
results['suspicious_profiles'] = [e for e in engagements
if is_generic_name(e['username']) or
is_new_account(e['created_at'])]
# Timing analysis
timestamps = [e['timestamp'] for e in engagements if e['timestamp']]
if timestamps:
results['time_analysis']['mean_interval'], results['time_analysis']['std_dev'] = analyze_timing(timestamps)
# Content analysis
comments = [e['comment'] for e in engagements if e['comment']]
results['content_analysis']['generic_comments'] = detect_generic_text(comments)
results['content_analysis']['duplicate_comments'] = find_duplicates(comments)
# Cluster analysis
results['cluster_analysis'] = cluster_engagers(engagements).tolist()
return results