Spaces:

SURESHBEEKHANI
/

WhatsApp-Conversations-Analysis

Runtime error

App Files Files Community

WhatsApp-Conversations-Analysis / src /helper.py

SURESHBEEKHANI

Upload 11 files

7089d06 verified about 1 year ago

raw

history blame contribute delete

5.89 kB

	import matplotlib.pyplot as plt
	from urlextract import URLExtract
	from wordcloud import WordCloud
	import pandas as pd
	from collections import Counter
	import emoji

	from textblob import TextBlob

	# Initialize URL extractor globally
	url_extractor = URLExtract()

	def fetch_stats(df: pd.DataFrame, selected_user: str):
	"""
	Fetch basic statistics (message count, word count, media count, and link count)
	for a selected user or aggregate statistics if 'Overall' is selected.
	"""
	# Filter the DataFrame based on the selected user
	df_filtered = df if selected_user == 'Overall' else df[df['User'] == selected_user]

	# Calculate statistics
	num_messages = df_filtered.shape[0]
	num_words = df_filtered['Message'].apply(lambda msg: len(msg.split())).sum()
	num_media_messages = df_filtered[df_filtered['Message'] == '<Media omitted>'].shape[0]
	num_links = df_filtered['Message'].apply(lambda msg: len(url_extractor.find_urls(msg))).sum()

	return num_messages, num_words, num_media_messages, num_links

	def top_active_users(df: pd.DataFrame, top_n: int = 5):
	"""
	Calculate the top N most active users based on their message count.
	"""
	top_users = df['User'].value_counts().head(top_n)
	user_percentage = (df['User'].value_counts(normalize=True) * 100).round(2).reset_index()
	user_percentage.columns = ['User', 'Percentage']

	return top_users, user_percentage

	def create_wordcloud(selected_user: str, df: pd.DataFrame):
	"""
	Generate a word cloud from the selected user's messages.
	"""
	with open('stop_hinglish.txt', 'r') as file:
	stop_words = set(file.read().split())

	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	filtered_messages = df[(df['User'] != 'group_notification') & (df['Message'] != '<Media omitted>')]
	filtered_messages['Message'] = filtered_messages['Message'].apply(
	lambda msg: " ".join(word for word in msg.lower().split() if word not in stop_words)
	)

	wc = WordCloud(width=500, height=500, min_font_size=10, background_color='white')
	wordcloud = wc.generate(filtered_messages['Message'].str.cat(sep=" "))

	return wordcloud

	def most_common_word(selected_user: str, df: pd.DataFrame):
	"""
	Get the most common words used by the selected user, excluding stop words.
	"""
	with open('stop_hinglish.txt', 'r') as file:
	stop_words = set(file.read().split())

	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	filtered_messages = df[(df['User'] != 'group_notification') & (df['Message'] != '<Media omitted>')]
	words = [
	word
	for message in filtered_messages['Message']
	for word in message.lower().split()
	if word not in stop_words
	]

	word_counts = pd.DataFrame(Counter(words).most_common(20))

	return word_counts

	def emojis_analysis(selected_user: str, df: pd.DataFrame) -> pd.DataFrame:
	"""
	Extract and analyze emojis from messages for a specific user or overall.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	extracted_emojis = [
	char
	for message in df['Message'].dropna()
	for char in message
	if char in emoji.EMOJI_DATA
	]

	emoji_counts = Counter(extracted_emojis)
	emoji_df = pd.DataFrame(emoji_counts.most_common())

	return emoji_df

	def monthly_timeline(selected_user: str, df: pd.DataFrame):
	"""
	Generate a timeline of message counts by month and year for the selected user.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	timeline = df.groupby(['Year', 'month_num', 'Month']).count()['Message'].reset_index()
	timeline['time'] = timeline.apply(lambda row: f"{row['Month']}-{row['Year']}", axis=1)

	return timeline

	def daily_timeline(selected_user: str, df: pd.DataFrame):
	"""
	Generate a daily timeline of messages for the selected user.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	daily_timeline = df.groupby('only_date').count()['Message'].reset_index()

	return daily_timeline

	def week_activity_map(selected_user: str, df: pd.DataFrame):
	"""
	Generate a weekly activity map of messages for the selected user.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	return df['day_name'].value_counts()

	def month_activity_map(selected_user: str, df: pd.DataFrame):
	"""
	Generate a monthly activity map of messages for the selected user.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	return df['Month'].value_counts()

	def activity_heatmap(selected_user: str, df: pd.DataFrame):
	"""
	Generate a heatmap of user activity by day and period for the selected user.
	"""
	if selected_user != 'Overall':
	df = df[df['User'] == selected_user]

	user_heatmap = df.pivot_table(index='day_name', columns='period', values='Message', aggfunc='count').fillna(0)

	return user_heatmap

	# Function to perform sentiment analysis
	def perform_sentiment_analysis(data, selected_user):
	"""Analyze sentiment of messages."""
	if selected_user != "Overall":
	data = data[data["User"] == selected_user]

	# Calculate polarity and classify sentiment
	data["Polarity"] = data["Message"].apply(lambda x: TextBlob(x).sentiment.polarity)
	data["Sentiment"] = data["Polarity"].apply(
	lambda x: "Positive" if x > 0.1 else ("Negative" if x < -0.1 else "Neutral")
	)

	# Aggregate sentiment counts
	sentiment_counts = data["Sentiment"].value_counts()

	return sentiment_counts, data