Spaces:
Sleeping
Sleeping
| import time | |
| import pandas as pd | |
| import preprocessor | |
| import random | |
| def generate_large_chat(lines=10000): | |
| """Generates a synthetic WhatsApp chat log.""" | |
| senders = ["User1", "User2", "User3"] | |
| messages = [ | |
| "Hello there, how are you?", | |
| "I am doing great, thanks for asking! Project update?", | |
| "This is a test message to simulate a long chat about artificial intelligence.", | |
| "Meeting is at 10 AM tomorrow to discuss the roadmap.", | |
| "Check out this link: https://example.com", | |
| "Haha that is funny 😂", | |
| "Je parle un peu français aussi. C'est la vie.", | |
| "Non, je ne crois pas. Il fait beau aujourd'hui.", | |
| "Ok, see you later. Don't forget the deadline.", | |
| "Python is a great programming language for data science.", | |
| "Streamlit makes building apps very easy and fast." | |
| ] | |
| chat_data = [] | |
| for _ in range(lines): | |
| date = f"{random.randint(1, 12)}/{random.randint(1, 28)}/23" | |
| hour = random.randint(1, 12) | |
| minute = random.randint(10, 59) | |
| ampm = random.choice(["AM", "PM"]) | |
| time_str = f"{hour}:{minute} {ampm}" | |
| sender = random.choice(senders) | |
| message = random.choice(messages) | |
| chat_data.append(f"{date}, {time_str} - {sender}: {message}") | |
| return "\n".join(chat_data) | |
| def profile_preprocessing(): | |
| print("Generating synthetic data (10,000 lines)...") | |
| raw_data = generate_large_chat(10000) | |
| print(f"Data size: {len(raw_data) / 1024 / 1024:.2f} MB") | |
| print("\nStarting profiling...") | |
| start_total = time.time() | |
| # We can't easily profile inside the function without modifying it, | |
| # so we will measure the total time and infer from code analysis | |
| # or modify preprocessor.py temporarily to print timings. | |
| # For now, let's just run it and see the total time. | |
| try: | |
| start_time = time.time() | |
| # Step 1: Parse | |
| df = preprocessor.parse_data(raw_data) | |
| print(f"Parsing took: {time.time() - start_time:.2f}s") | |
| # Step 2: Analyze | |
| step_start = time.time() | |
| df, topics = preprocessor.analyze_sentiment_and_topics(df) | |
| print(f"Analysis took: {time.time() - step_start:.2f}s") | |
| end_total = time.time() | |
| print(f"\nTotal Preprocessing Time: {end_total - start_total:.2f} seconds") | |
| print(f"Messages processed: {len(df)}") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| profile_preprocessing() | |