File size: 800 Bytes
eab2256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from __future__ import annotations

import pandas as pd

from twitter_sentiment.config import load_config, Config
from twitter_sentiment.preprocessing import clean_text, preprocess_dataframe


def test_clean_text_removes_urls_mentions(tmp_path):
    config = load_config()
    result = clean_text("Check this https://example.com @user", config)
    assert "http" not in result
    assert "@" not in result


def test_preprocess_dataframe_returns_expected_columns(tmp_path):
    config = load_config()
    df = pd.DataFrame({
        config.data["text_column"]: ["Great flight!"],
        config.data["target_column"]: ["positive"],
    })
    processed = preprocess_dataframe(df, config)
    assert processed.shape == (1, 2)
    assert processed[config.data["text_column"]].iloc[0] == "great flight"