MatteoScript's picture
Upload 6 files
7687049 verified
raw
history blame contribute delete
949 Bytes
from __future__ import annotations
from pathlib import Path
from typing import Tuple
import pandas as pd
def _parse_ts(df: pd.DataFrame) -> pd.DataFrame:
# Google Forms export uses this column name
if "Informazioni cronologiche" in df.columns:
df = df.copy()
df["_ts"] = pd.to_datetime(df["Informazioni cronologiche"], errors="coerce")
df = df.sort_values("_ts")
else:
df = df.copy()
df["_ts"] = pd.NaT
return df
def load_and_dedup(excel_path: Path) -> pd.DataFrame:
df = pd.read_excel(excel_path, sheet_name=0)
df = _parse_ts(df)
if "Nome e cognome" in df.columns:
df = df.drop_duplicates(subset=["Nome e cognome"], keep="last")
return df.reset_index(drop=True)
def load_pair(auto_path: Path, valut_path: Path) -> Tuple[pd.DataFrame, pd.DataFrame]:
auto_df = load_and_dedup(auto_path)
valut_df = load_and_dedup(valut_path)
return auto_df, valut_df