File size: 2,730 Bytes
2e164d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import pandas as pd
import re
from typing import Any

# Precompile regex for special-only strings.
SPECIAL_ONLY_REGEX = re.compile(r'^[^A-Za-z0-9]+$')

def is_numeric_or_special(s: Any) -> bool:
    """
    Check if the provided value is numeric or consists solely of special characters.
    
    Parameters:
        s (Any): The input value to check.
        
    Returns:
        bool: True if the value is numeric or special-only, False otherwise.
    """
    if pd.isnull(s):
        return False
    # Ensure the input is a string.
    s = str(s).strip()

    # Check if the string can be converted to a float.
    try:
        float(s)
        return True
    except ValueError:
        pass

    # Check if the string is composed exclusively of special characters.
    if SPECIAL_ONLY_REGEX.match(s):
        return True

    return False

def remove_numeric_or_special_responses(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
    """
    Remove rows from the DataFrame where the target column's value is either numeric or 
    consists solely of special characters.
    
    Parameters:
        df (pd.DataFrame): The input DataFrame.
        target_col (str): The name of the column to filter.
        
    Returns:
        pd.DataFrame: A DataFrame with the undesired responses removed.
    """
    filtered_df = df[~df[target_col].map(is_numeric_or_special)].reset_index(drop=True)
    return filtered_df


#####################
# DATE CONVERT
#####################

import pandas as pd
import datetime
from dateutil import parser

def robust_convert_date(date_series):
    """
    Convert a pandas Series containing dates in various formats to datetime objects.

    This function tries:
    1. The built-in pd.to_datetime() with infer_datetime_format and dayfirst options.
    2. Falls back to dateutil.parser.parse for any values that remain unparsed.

    Parameters:
        date_series (pd.Series): A pandas Series with date values (as strings, numbers, etc.)

    Returns:
        pd.Series: A Series of datetime objects (or pd.NaT if conversion fails)
    """
    def convert_single(x):
        # If the value is already a datetime, just return it.
        if pd.isnull(x):
            return pd.NaT
        if isinstance(x, (pd.Timestamp, datetime.datetime)):
            return x
        # First, try using pd.to_datetime with coercion.
        dt = pd.to_datetime(x, errors='coerce', infer_datetime_format=True, dayfirst=True)
        if pd.notnull(dt):
            return dt
        # Fallback: use dateutil.parser to attempt parsing.
        try:
            return parser.parse(str(x), dayfirst=True)
        except Exception:
            return pd.NaT

    return date_series.apply(convert_single)