import pytest import pandas as pd import os import json import tempfile from core.parsers import Parsers def test_whatsapp_early_exit(): """Verify WhatsApp parser stops at 50,001 messages.""" with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as tmp: for i in range(50010): tmp.write(f"01/01/2023, 10:00 - User: Message {i}\n") tmp_path = tmp.name try: df = Parsers.parse_whatsapp(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path) def test_telegram_html_early_exit(): """Verify Telegram HTML parser stops at 50,001 messages.""" with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False) as tmp: tmp.write("") for i in range(50010): tmp.write(f'
10:00
User
Message {i}
') tmp.write("") tmp_path = tmp.name try: df = Parsers.parse_telegram(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path) def test_instagram_json_early_exit(): """Verify Instagram JSON parser stops at 50,001 messages.""" data = { "participants": [{"name": "User"}, {"name": "Partner"}], "messages": [{"sender_name": "User", "content": f"Msg {i}", "timestamp_ms": 1672531200000 + i} for i in range(50010)] } with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp: json.dump(data, tmp) tmp_path = tmp.name try: df = Parsers.parse_json(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path) def test_discord_native_json_early_exit(): """Verify Discord Native JSON parser stops at 50,001 messages.""" data = [{"Timestamp": "2023-01-01T10:00:00", "Contents": f"Msg {i}"} for i in range(50010)] with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp: json.dump(data, tmp) tmp_path = tmp.name try: df = Parsers.parse_json(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path) def test_telegram_json_early_exit(): """Verify Telegram JSON parser stops at 50,001 messages.""" data = { "type": "personal_chat", "messages": [{"type": "message", "from": "User", "text": f"Msg {i}", "date": "2023-01-01T10:00:00"} for i in range(50010)] } with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp: json.dump(data, tmp) tmp_path = tmp.name try: df = Parsers.parse_json(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path) def test_discord_exporter_json_early_exit(): """Verify DiscordChatExporter JSON parser stops at 50,001 messages.""" data = { "channel": {"name": "general"}, "messages": [{"author": {"name": "User"}, "content": f"Msg {i}", "timestamp": "2023-01-01T10:00:00"} for i in range(50010)] } with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp: json.dump(data, tmp) tmp_path = tmp.name try: df = Parsers.parse_json(open(tmp_path, "rb").read()) assert len(df) == 50001 finally: if os.path.exists(tmp_path): os.remove(tmp_path)