Spaces:

hansche
/

SocialMediaFoci

Sleeping

SocialMediaFoci / profile_performance.py

Bismark

Update Space

5ab54b7 5 months ago

2.6 kB

	import time
	import pandas as pd
	import preprocessor
	import random

	def generate_large_chat(lines=10000):
	"""Generates a synthetic WhatsApp chat log."""
	senders = ["User1", "User2", "User3"]
	messages = [
	"Hello there, how are you?",
	"I am doing great, thanks for asking! Project update?",
	"This is a test message to simulate a long chat about artificial intelligence.",
	"Meeting is at 10 AM tomorrow to discuss the roadmap.",
	"Check out this link: https://example.com",
	"Haha that is funny 😂",
	"Je parle un peu français aussi. C'est la vie.",
	"Non, je ne crois pas. Il fait beau aujourd'hui.",
	"Ok, see you later. Don't forget the deadline.",
	"Python is a great programming language for data science.",
	"Streamlit makes building apps very easy and fast."
	]

	chat_data = []
	for _ in range(lines):
	date = f"{random.randint(1, 12)}/{random.randint(1, 28)}/23"
	hour = random.randint(1, 12)
	minute = random.randint(10, 59)
	ampm = random.choice(["AM", "PM"])
	time_str = f"{hour}:{minute} {ampm}"
	sender = random.choice(senders)
	message = random.choice(messages)
	chat_data.append(f"{date}, {time_str} - {sender}: {message}")

	return "\n".join(chat_data)

	def profile_preprocessing():
	print("Generating synthetic data (10,000 lines)...")
	raw_data = generate_large_chat(10000)
	print(f"Data size: {len(raw_data) / 1024 / 1024:.2f} MB")

	print("\nStarting profiling...")
	start_total = time.time()

	# We can't easily profile inside the function without modifying it,
	# so we will measure the total time and infer from code analysis
	# or modify preprocessor.py temporarily to print timings.
	# For now, let's just run it and see the total time.

	try:
	start_time = time.time()

	# Step 1: Parse
	df = preprocessor.parse_data(raw_data)
	print(f"Parsing took: {time.time() - start_time:.2f}s")

	# Step 2: Analyze
	step_start = time.time()
	df, topics = preprocessor.analyze_sentiment_and_topics(df)
	print(f"Analysis took: {time.time() - step_start:.2f}s")
	end_total = time.time()
	print(f"\nTotal Preprocessing Time: {end_total - start_total:.2f} seconds")
	print(f"Messages processed: {len(df)}")

	except Exception as e:
	print(f"Error: {e}")
	import traceback
	traceback.print_exc()

	if __name__ == "__main__":
	profile_preprocessing()