| import pandas as pd |
| from math import sqrt |
|
|
| def summarize_metrics(skip, csv_path: str, save_path: str | None = None) -> pd.DataFrame: |
| """ |
| Compute mean and standard deviation for all columns except the first |
| (assumed non-numeric identifier like 'Peptide Sequence'). |
| |
| Returns a DataFrame with rows = column names and columns = ['mean','std','count']. |
| Uses sample std (ddof=1). Non-numeric cells are coerced to NaN. |
| """ |
| df = pd.read_csv(csv_path) |
| vals = df.iloc[:, skip:].apply(pd.to_numeric, errors='coerce') |
| stats = vals.agg(['mean', 'std', 'count']).T |
| if save_path: |
| stats.to_csv(save_path, index=True) |
| return stats |
|
|
| def summarize_list(xs, ddof = 1): |
| |
| vals = [] |
| for x in xs: |
| if x is None or x == "": |
| continue |
| try: |
| vals.append(float(x)) |
| except (TypeError, ValueError): |
| continue |
|
|
| n = len(vals) |
| if n == 0: |
| raise ValueError("No numeric values found.") |
| if n <= ddof: |
| raise ValueError(f"Need at least {ddof + 1} numeric values; got {n}.") |
|
|
| |
| mean = 0.0 |
| M2 = 0.0 |
| count = 0 |
| for v in vals: |
| count += 1 |
| delta = v - mean |
| mean += delta / count |
| M2 += delta * (v - mean) |
|
|
| var = M2 / (count - ddof) |
| std = sqrt(var) |
|
|
| result = {"mean": mean, "std": std, "count": count} |
|
|
| return result |
|
|
| def csv_column_to_list(path: str, column: str, *, dropna: bool = True): |
| df = pd.read_csv(path) |
| if column not in df.columns: |
| raise KeyError(f"Column '{column}' not found. Available: {list(df.columns)}") |
| s = df[column] |
| if dropna: |
| s = s.dropna() |
| return s.tolist() |
|
|
| def main(): |
| path = "" |
| prot_name = "" |
| stats = summarize_metrics(skip=1, csv_path=f"{path}/{prot_name}_generation_results.csv", |
| save_path=f"{path}/results_summary.csv") |
|
|
| print(stats) |
| |
| if __name__ == '__main__': |
| main() |