Spaces:
Build error
Build error
File size: 3,740 Bytes
1ac9f32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | import pytest
import pandas as pd
import os
import json
import tempfile
from core.parsers import Parsers
def test_whatsapp_early_exit():
"""Verify WhatsApp parser stops at 50,001 messages."""
with tempfile.NamedTemporaryFile(mode='w+', suffix='.txt', delete=False) as tmp:
for i in range(50010):
tmp.write(f"01/01/2023, 10:00 - User: Message {i}\n")
tmp_path = tmp.name
try:
df = Parsers.parse_whatsapp(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_telegram_html_early_exit():
"""Verify Telegram HTML parser stops at 50,001 messages."""
with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False) as tmp:
tmp.write("<html><body>")
for i in range(50010):
tmp.write(f'<div class="message "><div class="pull_right date details" title="01.01.2023 10:00:{i % 60}">10:00</div><div class="from_name">User</div><div class="text">Message {i}</div></div>')
tmp.write("</body></html>")
tmp_path = tmp.name
try:
df = Parsers.parse_telegram(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_instagram_json_early_exit():
"""Verify Instagram JSON parser stops at 50,001 messages."""
data = {
"participants": [{"name": "User"}, {"name": "Partner"}],
"messages": [{"sender_name": "User", "content": f"Msg {i}", "timestamp_ms": 1672531200000 + i} for i in range(50010)]
}
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
json.dump(data, tmp)
tmp_path = tmp.name
try:
df = Parsers.parse_json(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_discord_native_json_early_exit():
"""Verify Discord Native JSON parser stops at 50,001 messages."""
data = [{"Timestamp": "2023-01-01T10:00:00", "Contents": f"Msg {i}"} for i in range(50010)]
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
json.dump(data, tmp)
tmp_path = tmp.name
try:
df = Parsers.parse_json(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_telegram_json_early_exit():
"""Verify Telegram JSON parser stops at 50,001 messages."""
data = {
"type": "personal_chat",
"messages": [{"type": "message", "from": "User", "text": f"Msg {i}", "date": "2023-01-01T10:00:00"} for i in range(50010)]
}
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
json.dump(data, tmp)
tmp_path = tmp.name
try:
df = Parsers.parse_json(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
def test_discord_exporter_json_early_exit():
"""Verify DiscordChatExporter JSON parser stops at 50,001 messages."""
data = {
"channel": {"name": "general"},
"messages": [{"author": {"name": "User"}, "content": f"Msg {i}", "timestamp": "2023-01-01T10:00:00"} for i in range(50010)]
}
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json', delete=False) as tmp:
json.dump(data, tmp)
tmp_path = tmp.name
try:
df = Parsers.parse_json(open(tmp_path, "rb").read())
assert len(df) == 50001
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
|