AlienChen's picture
download
raw
1.02 kB
import pandas as pd
df = pd.read_csv('/scratch/pranamlab/tong/SMILES_BindEvaluator/datasets/train.csv')
targets = df['Target'].tolist()
total_length = sum([len(seq) for seq in targets])
def parse_sites(x):
"""
"49,50,51" -> [49,50,51]
Handles empty/NaN gracefully.
"""
if x is None:
return []
s = str(x).strip()
if s == "" or s.lower() == "nan":
return []
# remove possible quotes
if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")):
s = s[1:-1].strip()
if s == "":
return []
return len([int(t.strip()) for t in s.split(",") if t.strip() != ""])
binding_sites = df['Binding Sites'].tolist()
num_binding_sites = sum([parse_sites(site) for site in binding_sites])
num_non_binding_sites = total_length - num_binding_sites
weight_pos = total_length / (2 * num_binding_sites)
weight_neg = total_length / (2 * num_non_binding_sites)
print(f"Positive Weight: {weight_pos}")
print(f"Negative Weight: {weight_neg}")

Xet Storage Details

Size:
1.02 kB
·
Xet hash:
c4a668b3814bf822c9346fbd4326a1a4a774c720771254774d12a7f62e9425b8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.