File size: 3,740 Bytes
1ac9f32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pytest
import pandas as pd
import os
import json
import tempfile
from core.parsers import Parsers

def test_whatsapp_early_exit():
    """Verify WhatsApp parser stops at 50,001 messages."""
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as tmp:
        for i in range(50010):
            tmp.write(f"01/01/2023, 10:00 - User: Message {i}\n")
        tmp_path = tmp.name

    try:
        df = Parsers.parse_whatsapp(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

def test_telegram_html_early_exit():
    """Verify Telegram HTML parser stops at 50,001 messages."""
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False) as tmp:
        tmp.write("<html><body>")
        for i in range(50010):
            tmp.write(f'<div class="message "><div class="pull_right date details" title="01.01.2023 10:00:{i % 60}">10:00</div><div class="from_name">User</div><div class="text">Message {i}</div></div>')
        tmp.write("</body></html>")
        tmp_path = tmp.name

    try:
        df = Parsers.parse_telegram(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

def test_instagram_json_early_exit():
    """Verify Instagram JSON parser stops at 50,001 messages."""
    data = {
        "participants": [{"name": "User"}, {"name": "Partner"}],
        "messages": [{"sender_name": "User", "content": f"Msg {i}", "timestamp_ms": 1672531200000 + i} for i in range(50010)]
    }
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
        json.dump(data, tmp)
        tmp_path = tmp.name

    try:
        df = Parsers.parse_json(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

def test_discord_native_json_early_exit():
    """Verify Discord Native JSON parser stops at 50,001 messages."""
    data = [{"Timestamp": "2023-01-01T10:00:00", "Contents": f"Msg {i}"} for i in range(50010)]
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
        json.dump(data, tmp)
        tmp_path = tmp.name

    try:
        df = Parsers.parse_json(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

def test_telegram_json_early_exit():
    """Verify Telegram JSON parser stops at 50,001 messages."""
    data = {
        "type": "personal_chat",
        "messages": [{"type": "message", "from": "User", "text": f"Msg {i}", "date": "2023-01-01T10:00:00"} for i in range(50010)]
    }
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
        json.dump(data, tmp)
        tmp_path = tmp.name

    try:
        df = Parsers.parse_json(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)

def test_discord_exporter_json_early_exit():
    """Verify DiscordChatExporter JSON parser stops at 50,001 messages."""
    data = {
        "channel": {"name": "general"},
        "messages": [{"author": {"name": "User"}, "content": f"Msg {i}", "timestamp": "2023-01-01T10:00:00"} for i in range(50010)]
    }
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
        json.dump(data, tmp)
        tmp_path = tmp.name

    try:
        df = Parsers.parse_json(open(tmp_path, "rb").read())
        assert len(df) == 50001
    finally:
        if os.path.exists(tmp_path):
            os.remove(tmp_path)