Matchball commited on
Commit
ba04ff7
·
verified ·
1 Parent(s): 134a068

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -148
app.py CHANGED
@@ -11,8 +11,6 @@ import os
11
 
12
  # Define source folder and console outpiut colors
13
  folder_path = r"C:\Users\match\Downloads"
14
- CEND = '\033[0m'
15
- CRED = '\033[91m'
16
 
17
  # ------------------------------------------------
18
  # 1) Amino-acid dictionary (neutral, free AA)
@@ -977,33 +975,6 @@ def clean_file_contents(file_contents):
977
 
978
  return ' '.join(re.sub(r'\[\d+\]', '', file_contents).split()).strip()
979
 
980
- # Specify the dictionary file name
981
- file_name = "chemical_data.txt"
982
-
983
- # Initialize an empty dictionary
984
- chemical_dict = {}
985
-
986
- # Load the dictionary from the file
987
- try:
988
- with open(file_name, "r", encoding="utf-8") as file: # Explicit UTF-8 encoding
989
- for line in file:
990
- # Split each line into key and value
991
- parts = line.strip().split(":")
992
-
993
- if len(parts) == 2:
994
- # Extract key and value
995
- chemical = parts[0].strip()
996
- formula = parts[1].strip()
997
- # Add to the dictionary
998
- chemical_dict[chemical] = formula
999
- except FileNotFoundError:
1000
- print(f"File '{file_name}' not found. Make sure the file exists.")
1001
- except UnicodeDecodeError:
1002
- print(f"Encoding issue detected. Try opening '{file_name}' with a different encoding.")
1003
-
1004
- # Print the loaded dictionary
1005
- print("Loaded Dictionary: chemical_dict.txt")
1006
-
1007
 
1008
  def check_reagents(pdf_file_path):
1009
  """
@@ -1022,14 +993,11 @@ def check_reagents(pdf_file_path):
1022
  file_contents = clean_file_contents(file_contents)
1023
 
1024
  # search for the following pattern "x mg Y (z mmol" where Y is a string and x and z are float or integer and transform it into "Y (x mg, z mmol)"
1025
-
1026
  pattern = r"(\d+\.?\d*) mg ([a-zA-Z0-9-]+) \((\d+\.?\d*) mmol\)"
1027
 
1028
- # Function to transform the string
1029
  def transform_string(s):
1030
  return re.sub(pattern, r"\2 (\1 mg, \3 mmol)", s)
1031
 
1032
- # Transform file_contents string
1033
  file_contents = transform_string(file_contents)
1034
  result = extract_values_from_text(file_contents)
1035
  x, y, z = extract_info_from_list(result)
@@ -1037,13 +1005,12 @@ def check_reagents(pdf_file_path):
1037
  if x:
1038
  for i in range(len(x)):
1039
  if x[i]:
1040
-
1041
  if z[i] != 0:
1042
  apparent_mw = y[i] / z[i]
1043
  else:
1044
  apparent_mw = 1000.66
1045
  mw = None
1046
- words = x[i].split() # Line 1: Split the string into a list of words
1047
  if words and len(words[0]) > 4:
1048
  formula_from_name = name_to_sum_formula(x[i])
1049
  else:
@@ -1061,10 +1028,7 @@ def check_reagents(pdf_file_path):
1061
  if mw:
1062
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1063
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
1064
- if mass_error > 10:
1065
- reagent_errors.append(f"{CRED}{error_msg}{CEND}")
1066
- else:
1067
- reagent_errors.append(error_msg)
1068
 
1069
  if not mw:
1070
  x[i] = remove_first_word(x[i])
@@ -1089,10 +1053,7 @@ def check_reagents(pdf_file_path):
1089
  if mw:
1090
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1091
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%' + (' *' if formula_from_name else '')})"
1092
- if mass_error > 10:
1093
- reagent_errors.append(f"{CRED}{error_msg}{CEND}")
1094
- else:
1095
- reagent_errors.append(error_msg)
1096
 
1097
  if not mw:
1098
  x[i] = remove_first_word(x[i])
@@ -1121,10 +1082,7 @@ def check_reagents(pdf_file_path):
1121
  if mw and mw != 666.66:
1122
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1123
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
1124
- if mass_error > 10:
1125
- reagent_errors.append(f"{CRED}{error_msg}{CEND}")
1126
- else:
1127
- reagent_errors.append(error_msg)
1128
 
1129
  if not mw:
1130
  x[i] = remove_first_word(x[i])
@@ -1165,122 +1123,45 @@ def check_reagents(pdf_file_path):
1165
  if mw and mw != 666.66:
1166
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1167
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name or replacement_formula else '')}"
1168
- if mass_error > 10:
1169
- reagent_errors.append(f"{CRED}{error_msg}{CEND}")
1170
- else:
1171
- reagent_errors.append(error_msg)
1172
 
1173
  except Exception as e:
1174
- # Handle the error, or simply return error info
1175
  reagent_errors.append(f"an error occurred: {e}")
1176
 
1177
  return reagent_errors
1178
 
1179
- def clean_ansi_codes(text):
1180
- """
1181
- Remove ANSI escape codes from text and return clean text.
1182
- """
1183
- ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
1184
- return ansi_escape.sub('', text)
1185
 
1186
- def format_reagent_error(error_text):
1187
- """
1188
- Format reagent error text for Streamlit display.
1189
- Converts ANSI color codes to appropriate Streamlit styling.
1190
- """
1191
- # Clean the text of ANSI codes
1192
- clean_text = clean_ansi_codes(error_text)
1193
-
1194
- # Check if the original text had red color codes (indicating an error/warning)
1195
- has_red_color = '\033[91m' in error_text or '[91m' in error_text
1196
- has_asterisk = '*' in error_text
1197
-
1198
- return clean_text, has_red_color, has_asterisk
1199
 
1200
- def check_reagents(pdf_file_path):
1201
- """
1202
- Replace this entire function with your actual implementation.
1203
- """
1204
- # This is just a placeholder - replace with your real function
1205
- return [] # Returns empty list so no fake errors show
1206
 
1207
- def clean_ansi_codes(text):
1208
- """
1209
- Remove ANSI escape codes from text and return clean text.
1210
- """
1211
- ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
1212
- return ansi_escape.sub('', text)
1213
 
1214
- def format_reagent_error(error_text):
1215
- """
1216
- Format reagent error text for Streamlit display.
1217
- Converts ANSI color codes and determines if mass error >= 10%.
1218
- """
1219
- # Clean the text of ANSI codes
1220
- clean_text = clean_ansi_codes(error_text)
1221
-
1222
- # Extract mass error percentage using regex
1223
- mass_error_pattern = r'Mass error:\s*(\d+\.?\d*)%'
1224
- match = re.search(mass_error_pattern, clean_text)
1225
-
1226
- is_high_error = False
1227
- if match:
1228
- mass_error_percent = float(match.group(1))
1229
- is_high_error = mass_error_percent >= 10.0
1230
-
1231
- return clean_text, is_high_error
1232
 
1233
- def clean_ansi_codes(text):
1234
- """
1235
- Remove ANSI escape codes from text and return clean text.
1236
- """
1237
- ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
1238
- return ansi_escape.sub('', text)
1239
 
1240
- def format_reagent_error(error_text):
1241
- """
1242
- Format reagent error text for Streamlit display.
1243
- Converts ANSI color codes and determines if mass error >= 10%.
1244
- """
1245
- # Clean the text of ANSI codes
1246
- clean_text = clean_ansi_codes(error_text)
1247
-
1248
- # Extract mass error percentage using regex
1249
- mass_error_pattern = r'Mass error:\s*(\d+\.?\d*)%'
1250
- match = re.search(mass_error_pattern, clean_text)
1251
-
1252
- is_high_error = False
1253
- if match:
1254
- mass_error_percent = float(match.group(1))
1255
- is_high_error = mass_error_percent >= 10.0
1256
-
1257
- return clean_text, is_high_error
 
 
1258
 
1259
- def clean_ansi_codes(text):
1260
- """
1261
- Remove ANSI escape codes from text and return clean text.
1262
- """
1263
- ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
1264
- return ansi_escape.sub('', text)
1265
 
1266
- def format_reagent_error(error_text):
1267
- """
1268
- Format reagent error text for Streamlit display.
1269
- Converts ANSI color codes and determines if mass error >= 10%.
1270
- """
1271
- # Clean the text of ANSI codes
1272
- clean_text = clean_ansi_codes(error_text)
1273
-
1274
- # Extract mass error percentage using regex
1275
- mass_error_pattern = r'Mass error:\s*(\d+\.?\d*)%'
1276
- match = re.search(mass_error_pattern, clean_text)
1277
-
1278
- is_high_error = False
1279
- if match:
1280
- mass_error_percent = float(match.group(1))
1281
- is_high_error = mass_error_percent >= 10.0
1282
-
1283
- return clean_text, is_high_error
1284
 
1285
  def main():
1286
  st.set_page_config(
 
11
 
12
  # Define source folder and console outpiut colors
13
  folder_path = r"C:\Users\match\Downloads"
 
 
14
 
15
  # ------------------------------------------------
16
  # 1) Amino-acid dictionary (neutral, free AA)
 
975
 
976
  return ' '.join(re.sub(r'\[\d+\]', '', file_contents).split()).strip()
977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
978
 
979
  def check_reagents(pdf_file_path):
980
  """
 
993
  file_contents = clean_file_contents(file_contents)
994
 
995
  # search for the following pattern "x mg Y (z mmol" where Y is a string and x and z are float or integer and transform it into "Y (x mg, z mmol)"
 
996
  pattern = r"(\d+\.?\d*) mg ([a-zA-Z0-9-]+) \((\d+\.?\d*) mmol\)"
997
 
 
998
  def transform_string(s):
999
  return re.sub(pattern, r"\2 (\1 mg, \3 mmol)", s)
1000
 
 
1001
  file_contents = transform_string(file_contents)
1002
  result = extract_values_from_text(file_contents)
1003
  x, y, z = extract_info_from_list(result)
 
1005
  if x:
1006
  for i in range(len(x)):
1007
  if x[i]:
 
1008
  if z[i] != 0:
1009
  apparent_mw = y[i] / z[i]
1010
  else:
1011
  apparent_mw = 1000.66
1012
  mw = None
1013
+ words = x[i].split()
1014
  if words and len(words[0]) > 4:
1015
  formula_from_name = name_to_sum_formula(x[i])
1016
  else:
 
1028
  if mw:
1029
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1030
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
1031
+ reagent_errors.append(error_msg)
 
 
 
1032
 
1033
  if not mw:
1034
  x[i] = remove_first_word(x[i])
 
1053
  if mw:
1054
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1055
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%' + (' *' if formula_from_name else '')})"
1056
+ reagent_errors.append(error_msg)
 
 
 
1057
 
1058
  if not mw:
1059
  x[i] = remove_first_word(x[i])
 
1082
  if mw and mw != 666.66:
1083
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1084
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
1085
+ reagent_errors.append(error_msg)
 
 
 
1086
 
1087
  if not mw:
1088
  x[i] = remove_first_word(x[i])
 
1123
  if mw and mw != 666.66:
1124
  mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
1125
  error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name or replacement_formula else '')}"
1126
+ reagent_errors.append(error_msg)
 
 
 
1127
 
1128
  except Exception as e:
 
1129
  reagent_errors.append(f"an error occurred: {e}")
1130
 
1131
  return reagent_errors
1132
 
 
 
 
 
 
 
1133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1134
 
 
 
 
 
 
 
1135
 
 
 
 
 
 
 
1136
 
1137
+ # Specify the dictionary file name
1138
+ file_name = "chemical_data.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1139
 
1140
+ # Initialize an empty dictionary
1141
+ chemical_dict = {}
 
 
 
 
1142
 
1143
+ # Load the dictionary from the file
1144
+ try:
1145
+ with open(file_name, "r", encoding="utf-8") as file: # Explicit UTF-8 encoding
1146
+ for line in file:
1147
+ # Split each line into key and value
1148
+ parts = line.strip().split(":")
1149
+
1150
+ if len(parts) == 2:
1151
+ # Extract key and value
1152
+ chemical = parts[0].strip()
1153
+ formula = parts[1].strip()
1154
+ # Add to the dictionary
1155
+ chemical_dict[chemical] = formula
1156
+ except FileNotFoundError:
1157
+ print(f"File '{file_name}' not found. Make sure the file exists.")
1158
+ except UnicodeDecodeError:
1159
+ print(f"Encoding issue detected. Try opening '{file_name}' with a different encoding.")
1160
+
1161
+ # Print the loaded dictionary
1162
+ print("Loaded Dictionary: chemical_dict.txt")
1163
 
 
 
 
 
 
 
1164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
 
1166
  def main():
1167
  st.set_page_config(