Spaces:
Sleeping
Sleeping
ss ok
Browse files
app.py
CHANGED
|
@@ -93,38 +93,37 @@ def create_dataframe(protein_sequence, annotations):
|
|
| 93 |
|
| 94 |
# Map UniProt feature types to our column names
|
| 95 |
feature_mapping = {
|
| 96 |
-
'helix': 'Secondary structure',
|
| 97 |
'strand': 'Secondary structure',
|
|
|
|
| 98 |
'turn': 'Secondary structure',
|
| 99 |
'domain': 'Domain',
|
| 100 |
-
'region': ['Pfam domain', 'Disorder'],
|
| 101 |
'disulfide bond': 'Disulfide bridges',
|
| 102 |
'glycosylation site': 'Glycosylation sites',
|
| 103 |
'modified residue': 'modified',
|
| 104 |
'active site': 'active sites',
|
| 105 |
-
'binding site': ['metal binding sites', 'DNA binding sites', 'RNA binding sites', 'ligand binding sites'],
|
| 106 |
-
'site': 'Phosphorylation sites'
|
| 107 |
}
|
| 108 |
|
| 109 |
for feature_type, values in annotations.items():
|
|
|
|
| 110 |
for start, end, description in values:
|
| 111 |
-
feature_type_lower = feature_type.lower()
|
| 112 |
-
|
| 113 |
# Get the corresponding column(s)
|
| 114 |
-
column = feature_mapping.get(
|
| 115 |
if not column:
|
| 116 |
continue
|
| 117 |
|
| 118 |
# Handle cases where one feature type maps to multiple possible columns
|
| 119 |
if isinstance(column, list):
|
| 120 |
-
if
|
| 121 |
if 'Pfam' in description:
|
| 122 |
column = 'Pfam domain'
|
| 123 |
elif 'disorder' in description.lower():
|
| 124 |
column = 'Disorder'
|
| 125 |
else:
|
| 126 |
continue
|
| 127 |
-
elif
|
| 128 |
if 'metal' in description.lower():
|
| 129 |
column = 'metal binding sites'
|
| 130 |
elif 'DNA' in description:
|
|
@@ -137,11 +136,14 @@ def create_dataframe(protein_sequence, annotations):
|
|
| 137 |
# Fill in the annotation
|
| 138 |
for i in range(start - 1, end):
|
| 139 |
if i < len(df):
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
df.loc[i, column] = f"{current_value}; {description}"
|
| 143 |
else:
|
| 144 |
-
df.loc[i, column]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
return df
|
| 147 |
|
|
|
|
| 93 |
|
| 94 |
# Map UniProt feature types to our column names
|
| 95 |
feature_mapping = {
|
|
|
|
| 96 |
'strand': 'Secondary structure',
|
| 97 |
+
'helix': 'Secondary structure',
|
| 98 |
'turn': 'Secondary structure',
|
| 99 |
'domain': 'Domain',
|
| 100 |
+
'region': ['Pfam domain', 'Disorder'],
|
| 101 |
'disulfide bond': 'Disulfide bridges',
|
| 102 |
'glycosylation site': 'Glycosylation sites',
|
| 103 |
'modified residue': 'modified',
|
| 104 |
'active site': 'active sites',
|
| 105 |
+
'binding site': ['metal binding sites', 'DNA binding sites', 'RNA binding sites', 'ligand binding sites'],
|
| 106 |
+
'site': 'Phosphorylation sites'
|
| 107 |
}
|
| 108 |
|
| 109 |
for feature_type, values in annotations.items():
|
| 110 |
+
feature_type = feature_type.lower() # Convert to lowercase for matching
|
| 111 |
for start, end, description in values:
|
|
|
|
|
|
|
| 112 |
# Get the corresponding column(s)
|
| 113 |
+
column = feature_mapping.get(feature_type)
|
| 114 |
if not column:
|
| 115 |
continue
|
| 116 |
|
| 117 |
# Handle cases where one feature type maps to multiple possible columns
|
| 118 |
if isinstance(column, list):
|
| 119 |
+
if feature_type == 'region':
|
| 120 |
if 'Pfam' in description:
|
| 121 |
column = 'Pfam domain'
|
| 122 |
elif 'disorder' in description.lower():
|
| 123 |
column = 'Disorder'
|
| 124 |
else:
|
| 125 |
continue
|
| 126 |
+
elif feature_type == 'binding site':
|
| 127 |
if 'metal' in description.lower():
|
| 128 |
column = 'metal binding sites'
|
| 129 |
elif 'DNA' in description:
|
|
|
|
| 136 |
# Fill in the annotation
|
| 137 |
for i in range(start - 1, end):
|
| 138 |
if i < len(df):
|
| 139 |
+
if column == 'Secondary structure':
|
| 140 |
+
df.loc[i, column] = feature_type.upper() # Use uppercase for secondary structure
|
|
|
|
| 141 |
else:
|
| 142 |
+
current_value = df.loc[i, column]
|
| 143 |
+
if current_value:
|
| 144 |
+
df.loc[i, column] = f"{current_value}; {description}"
|
| 145 |
+
else:
|
| 146 |
+
df.loc[i, column] = description
|
| 147 |
|
| 148 |
return df
|
| 149 |
|