Spaces:
Sleeping
Sleeping
| # bot_detector.py | |
| import re | |
| import numpy as np | |
| from datetime import datetime | |
| from sklearn.cluster import DBSCAN | |
| # Feature Extraction Functions | |
| def is_generic_name(username): | |
| return bool(re.match(r'^[a-z]+\d{3,}$', username)) or 'user' in username.lower() | |
| def is_new_account(created_date): | |
| return (datetime.now() - created_date).days < 7 if created_date else False | |
| def detect_generic_text(comments): | |
| generic_phrases = ['great post!', 'awesome', 'nice', 'cool', 'thanks for sharing'] | |
| return [c for c in comments if any(p in c.lower() for p in generic_phrases)] | |
| def analyze_timing(timestamps): | |
| deltas = np.diff(sorted(timestamps)) | |
| return np.mean(deltas), np.std(deltas) | |
| def cluster_engagers(engagements): | |
| # Simple clustering based on engagement patterns | |
| features = [[e['followers'], e['posts_count'], e['engagement_freq']] | |
| for e in engagements] | |
| return DBSCAN(eps=0.5, min_samples=3).fit_predict(features) | |
| # Main Detection Function | |
| def analyze_post(post_url): | |
| # Data collection would happen here | |
| engagements = fetch_engagements(post_url) | |
| # Analysis pipeline | |
| results = { | |
| 'suspicious_profiles': [], | |
| 'duplicate_comments': [], | |
| 'time_analysis': {}, | |
| 'content_analysis': {} | |
| } | |
| if engagements: | |
| # Profile analysis | |
| results['suspicious_profiles'] = [e for e in engagements | |
| if is_generic_name(e['username']) or | |
| is_new_account(e['created_at'])] | |
| # Timing analysis | |
| timestamps = [e['timestamp'] for e in engagements if e['timestamp']] | |
| if timestamps: | |
| results['time_analysis']['mean_interval'], results['time_analysis']['std_dev'] = analyze_timing(timestamps) | |
| # Content analysis | |
| comments = [e['comment'] for e in engagements if e['comment']] | |
| results['content_analysis']['generic_comments'] = detect_generic_text(comments) | |
| results['content_analysis']['duplicate_comments'] = find_duplicates(comments) | |
| # Cluster analysis | |
| results['cluster_analysis'] = cluster_engagers(engagements).tolist() | |
| return results |