Spaces:

waqasbm
/

AssessingSocialMedia

Sleeping

App Files Files Community

AssessingSocialMedia / bot_detector.py

waqasbm

Create bot_detector.py

9e6c511 verified 10 months ago

raw

history blame contribute delete

2.23 kB

	# bot_detector.py
	import re
	import numpy as np
	from datetime import datetime
	from sklearn.cluster import DBSCAN

	# Feature Extraction Functions
	def is_generic_name(username):
	return bool(re.match(r'^[a-z]+\d{3,}$', username)) or 'user' in username.lower()

	def is_new_account(created_date):
	return (datetime.now() - created_date).days < 7 if created_date else False

	def detect_generic_text(comments):
	generic_phrases = ['great post!', 'awesome', 'nice', 'cool', 'thanks for sharing']
	return [c for c in comments if any(p in c.lower() for p in generic_phrases)]

	def analyze_timing(timestamps):
	deltas = np.diff(sorted(timestamps))
	return np.mean(deltas), np.std(deltas)

	def cluster_engagers(engagements):
	# Simple clustering based on engagement patterns
	features = [[e['followers'], e['posts_count'], e['engagement_freq']]
	for e in engagements]
	return DBSCAN(eps=0.5, min_samples=3).fit_predict(features)

	# Main Detection Function
	def analyze_post(post_url):
	# Data collection would happen here
	engagements = fetch_engagements(post_url)

	# Analysis pipeline
	results = {
	'suspicious_profiles': [],
	'duplicate_comments': [],
	'time_analysis': {},
	'content_analysis': {}
	}

	if engagements:
	# Profile analysis
	results['suspicious_profiles'] = [e for e in engagements
	if is_generic_name(e['username']) or
	is_new_account(e['created_at'])]

	# Timing analysis
	timestamps = [e['timestamp'] for e in engagements if e['timestamp']]
	if timestamps:
	results['time_analysis']['mean_interval'], results['time_analysis']['std_dev'] = analyze_timing(timestamps)

	# Content analysis
	comments = [e['comment'] for e in engagements if e['comment']]
	results['content_analysis']['generic_comments'] = detect_generic_text(comments)
	results['content_analysis']['duplicate_comments'] = find_duplicates(comments)

	# Cluster analysis
	results['cluster_analysis'] = cluster_engagers(engagements).tolist()

	return results