| import pandas as pd | |
| df = pd.read_csv('/scratch/pranamlab/tong/SMILES_BindEvaluator/datasets/train.csv') | |
| targets = df['Target'].tolist() | |
| total_length = sum([len(seq) for seq in targets]) | |
| def parse_sites(x): | |
| """ | |
| "49,50,51" -> [49,50,51] | |
| Handles empty/NaN gracefully. | |
| """ | |
| if x is None: | |
| return [] | |
| s = str(x).strip() | |
| if s == "" or s.lower() == "nan": | |
| return [] | |
| # remove possible quotes | |
| if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")): | |
| s = s[1:-1].strip() | |
| if s == "": | |
| return [] | |
| return len([int(t.strip()) for t in s.split(",") if t.strip() != ""]) | |
| binding_sites = df['Binding Sites'].tolist() | |
| num_binding_sites = sum([parse_sites(site) for site in binding_sites]) | |
| num_non_binding_sites = total_length - num_binding_sites | |
| weight_pos = total_length / (2 * num_binding_sites) | |
| weight_neg = total_length / (2 * num_non_binding_sites) | |
| print(f"Positive Weight: {weight_pos}") | |
| print(f"Negative Weight: {weight_neg}") | |
Xet Storage Details
- Size:
- 1.02 kB
- Xet hash:
- c4a668b3814bf822c9346fbd4326a1a4a774c720771254774d12a7f62e9425b8
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.