tonigi commited on
Commit
548faf1
·
1 Parent(s): 489bd70
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -93,38 +93,37 @@ def create_dataframe(protein_sequence, annotations):
93
 
94
  # Map UniProt feature types to our column names
95
  feature_mapping = {
96
- 'helix': 'Secondary structure',
97
  'strand': 'Secondary structure',
 
98
  'turn': 'Secondary structure',
99
  'domain': 'Domain',
100
- 'region': ['Pfam domain', 'Disorder'], # Will check description
101
  'disulfide bond': 'Disulfide bridges',
102
  'glycosylation site': 'Glycosylation sites',
103
  'modified residue': 'modified',
104
  'active site': 'active sites',
105
- 'binding site': ['metal binding sites', 'DNA binding sites', 'RNA binding sites', 'ligand binding sites'], # Will check description
106
- 'site': 'Phosphorylation sites' # Will check description for phosphorylation
107
  }
108
 
109
  for feature_type, values in annotations.items():
 
110
  for start, end, description in values:
111
- feature_type_lower = feature_type.lower()
112
-
113
  # Get the corresponding column(s)
114
- column = feature_mapping.get(feature_type_lower)
115
  if not column:
116
  continue
117
 
118
  # Handle cases where one feature type maps to multiple possible columns
119
  if isinstance(column, list):
120
- if feature_type_lower == 'region':
121
  if 'Pfam' in description:
122
  column = 'Pfam domain'
123
  elif 'disorder' in description.lower():
124
  column = 'Disorder'
125
  else:
126
  continue
127
- elif feature_type_lower == 'binding site':
128
  if 'metal' in description.lower():
129
  column = 'metal binding sites'
130
  elif 'DNA' in description:
@@ -137,11 +136,14 @@ def create_dataframe(protein_sequence, annotations):
137
  # Fill in the annotation
138
  for i in range(start - 1, end):
139
  if i < len(df):
140
- current_value = df.loc[i, column]
141
- if current_value:
142
- df.loc[i, column] = f"{current_value}; {description}"
143
  else:
144
- df.loc[i, column] = description
 
 
 
 
145
 
146
  return df
147
 
 
93
 
94
  # Map UniProt feature types to our column names
95
  feature_mapping = {
 
96
  'strand': 'Secondary structure',
97
+ 'helix': 'Secondary structure',
98
  'turn': 'Secondary structure',
99
  'domain': 'Domain',
100
+ 'region': ['Pfam domain', 'Disorder'],
101
  'disulfide bond': 'Disulfide bridges',
102
  'glycosylation site': 'Glycosylation sites',
103
  'modified residue': 'modified',
104
  'active site': 'active sites',
105
+ 'binding site': ['metal binding sites', 'DNA binding sites', 'RNA binding sites', 'ligand binding sites'],
106
+ 'site': 'Phosphorylation sites'
107
  }
108
 
109
  for feature_type, values in annotations.items():
110
+ feature_type = feature_type.lower() # Convert to lowercase for matching
111
  for start, end, description in values:
 
 
112
  # Get the corresponding column(s)
113
+ column = feature_mapping.get(feature_type)
114
  if not column:
115
  continue
116
 
117
  # Handle cases where one feature type maps to multiple possible columns
118
  if isinstance(column, list):
119
+ if feature_type == 'region':
120
  if 'Pfam' in description:
121
  column = 'Pfam domain'
122
  elif 'disorder' in description.lower():
123
  column = 'Disorder'
124
  else:
125
  continue
126
+ elif feature_type == 'binding site':
127
  if 'metal' in description.lower():
128
  column = 'metal binding sites'
129
  elif 'DNA' in description:
 
136
  # Fill in the annotation
137
  for i in range(start - 1, end):
138
  if i < len(df):
139
+ if column == 'Secondary structure':
140
+ df.loc[i, column] = feature_type.upper() # Use uppercase for secondary structure
 
141
  else:
142
+ current_value = df.loc[i, column]
143
+ if current_value:
144
+ df.loc[i, column] = f"{current_value}; {description}"
145
+ else:
146
+ df.loc[i, column] = description
147
 
148
  return df
149