Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,8 +11,6 @@ import os
|
|
| 11 |
|
| 12 |
# Define source folder and console outpiut colors
|
| 13 |
folder_path = r"C:\Users\match\Downloads"
|
| 14 |
-
CEND = '\033[0m'
|
| 15 |
-
CRED = '\033[91m'
|
| 16 |
|
| 17 |
# ------------------------------------------------
|
| 18 |
# 1) Amino-acid dictionary (neutral, free AA)
|
|
@@ -977,33 +975,6 @@ def clean_file_contents(file_contents):
|
|
| 977 |
|
| 978 |
return ' '.join(re.sub(r'\[\d+\]', '', file_contents).split()).strip()
|
| 979 |
|
| 980 |
-
# Specify the dictionary file name
|
| 981 |
-
file_name = "chemical_data.txt"
|
| 982 |
-
|
| 983 |
-
# Initialize an empty dictionary
|
| 984 |
-
chemical_dict = {}
|
| 985 |
-
|
| 986 |
-
# Load the dictionary from the file
|
| 987 |
-
try:
|
| 988 |
-
with open(file_name, "r", encoding="utf-8") as file: # Explicit UTF-8 encoding
|
| 989 |
-
for line in file:
|
| 990 |
-
# Split each line into key and value
|
| 991 |
-
parts = line.strip().split(":")
|
| 992 |
-
|
| 993 |
-
if len(parts) == 2:
|
| 994 |
-
# Extract key and value
|
| 995 |
-
chemical = parts[0].strip()
|
| 996 |
-
formula = parts[1].strip()
|
| 997 |
-
# Add to the dictionary
|
| 998 |
-
chemical_dict[chemical] = formula
|
| 999 |
-
except FileNotFoundError:
|
| 1000 |
-
print(f"File '{file_name}' not found. Make sure the file exists.")
|
| 1001 |
-
except UnicodeDecodeError:
|
| 1002 |
-
print(f"Encoding issue detected. Try opening '{file_name}' with a different encoding.")
|
| 1003 |
-
|
| 1004 |
-
# Print the loaded dictionary
|
| 1005 |
-
print("Loaded Dictionary: chemical_dict.txt")
|
| 1006 |
-
|
| 1007 |
|
| 1008 |
def check_reagents(pdf_file_path):
|
| 1009 |
"""
|
|
@@ -1022,14 +993,11 @@ def check_reagents(pdf_file_path):
|
|
| 1022 |
file_contents = clean_file_contents(file_contents)
|
| 1023 |
|
| 1024 |
# search for the following pattern "x mg Y (z mmol" where Y is a string and x and z are float or integer and transform it into "Y (x mg, z mmol)"
|
| 1025 |
-
|
| 1026 |
pattern = r"(\d+\.?\d*) mg ([a-zA-Z0-9-]+) \((\d+\.?\d*) mmol\)"
|
| 1027 |
|
| 1028 |
-
# Function to transform the string
|
| 1029 |
def transform_string(s):
|
| 1030 |
return re.sub(pattern, r"\2 (\1 mg, \3 mmol)", s)
|
| 1031 |
|
| 1032 |
-
# Transform file_contents string
|
| 1033 |
file_contents = transform_string(file_contents)
|
| 1034 |
result = extract_values_from_text(file_contents)
|
| 1035 |
x, y, z = extract_info_from_list(result)
|
|
@@ -1037,13 +1005,12 @@ def check_reagents(pdf_file_path):
|
|
| 1037 |
if x:
|
| 1038 |
for i in range(len(x)):
|
| 1039 |
if x[i]:
|
| 1040 |
-
|
| 1041 |
if z[i] != 0:
|
| 1042 |
apparent_mw = y[i] / z[i]
|
| 1043 |
else:
|
| 1044 |
apparent_mw = 1000.66
|
| 1045 |
mw = None
|
| 1046 |
-
words = x[i].split()
|
| 1047 |
if words and len(words[0]) > 4:
|
| 1048 |
formula_from_name = name_to_sum_formula(x[i])
|
| 1049 |
else:
|
|
@@ -1061,10 +1028,7 @@ def check_reagents(pdf_file_path):
|
|
| 1061 |
if mw:
|
| 1062 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1063 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
|
| 1064 |
-
|
| 1065 |
-
reagent_errors.append(f"{CRED}{error_msg}{CEND}")
|
| 1066 |
-
else:
|
| 1067 |
-
reagent_errors.append(error_msg)
|
| 1068 |
|
| 1069 |
if not mw:
|
| 1070 |
x[i] = remove_first_word(x[i])
|
|
@@ -1089,10 +1053,7 @@ def check_reagents(pdf_file_path):
|
|
| 1089 |
if mw:
|
| 1090 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1091 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%' + (' *' if formula_from_name else '')})"
|
| 1092 |
-
|
| 1093 |
-
reagent_errors.append(f"{CRED}{error_msg}{CEND}")
|
| 1094 |
-
else:
|
| 1095 |
-
reagent_errors.append(error_msg)
|
| 1096 |
|
| 1097 |
if not mw:
|
| 1098 |
x[i] = remove_first_word(x[i])
|
|
@@ -1121,10 +1082,7 @@ def check_reagents(pdf_file_path):
|
|
| 1121 |
if mw and mw != 666.66:
|
| 1122 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1123 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
|
| 1124 |
-
|
| 1125 |
-
reagent_errors.append(f"{CRED}{error_msg}{CEND}")
|
| 1126 |
-
else:
|
| 1127 |
-
reagent_errors.append(error_msg)
|
| 1128 |
|
| 1129 |
if not mw:
|
| 1130 |
x[i] = remove_first_word(x[i])
|
|
@@ -1165,122 +1123,45 @@ def check_reagents(pdf_file_path):
|
|
| 1165 |
if mw and mw != 666.66:
|
| 1166 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1167 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name or replacement_formula else '')}"
|
| 1168 |
-
|
| 1169 |
-
reagent_errors.append(f"{CRED}{error_msg}{CEND}")
|
| 1170 |
-
else:
|
| 1171 |
-
reagent_errors.append(error_msg)
|
| 1172 |
|
| 1173 |
except Exception as e:
|
| 1174 |
-
# Handle the error, or simply return error info
|
| 1175 |
reagent_errors.append(f"an error occurred: {e}")
|
| 1176 |
|
| 1177 |
return reagent_errors
|
| 1178 |
|
| 1179 |
-
def clean_ansi_codes(text):
|
| 1180 |
-
"""
|
| 1181 |
-
Remove ANSI escape codes from text and return clean text.
|
| 1182 |
-
"""
|
| 1183 |
-
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
| 1184 |
-
return ansi_escape.sub('', text)
|
| 1185 |
|
| 1186 |
-
def format_reagent_error(error_text):
|
| 1187 |
-
"""
|
| 1188 |
-
Format reagent error text for Streamlit display.
|
| 1189 |
-
Converts ANSI color codes to appropriate Streamlit styling.
|
| 1190 |
-
"""
|
| 1191 |
-
# Clean the text of ANSI codes
|
| 1192 |
-
clean_text = clean_ansi_codes(error_text)
|
| 1193 |
-
|
| 1194 |
-
# Check if the original text had red color codes (indicating an error/warning)
|
| 1195 |
-
has_red_color = '\033[91m' in error_text or '[91m' in error_text
|
| 1196 |
-
has_asterisk = '*' in error_text
|
| 1197 |
-
|
| 1198 |
-
return clean_text, has_red_color, has_asterisk
|
| 1199 |
|
| 1200 |
-
def check_reagents(pdf_file_path):
|
| 1201 |
-
"""
|
| 1202 |
-
Replace this entire function with your actual implementation.
|
| 1203 |
-
"""
|
| 1204 |
-
# This is just a placeholder - replace with your real function
|
| 1205 |
-
return [] # Returns empty list so no fake errors show
|
| 1206 |
|
| 1207 |
-
def clean_ansi_codes(text):
|
| 1208 |
-
"""
|
| 1209 |
-
Remove ANSI escape codes from text and return clean text.
|
| 1210 |
-
"""
|
| 1211 |
-
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
| 1212 |
-
return ansi_escape.sub('', text)
|
| 1213 |
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
Format reagent error text for Streamlit display.
|
| 1217 |
-
Converts ANSI color codes and determines if mass error >= 10%.
|
| 1218 |
-
"""
|
| 1219 |
-
# Clean the text of ANSI codes
|
| 1220 |
-
clean_text = clean_ansi_codes(error_text)
|
| 1221 |
-
|
| 1222 |
-
# Extract mass error percentage using regex
|
| 1223 |
-
mass_error_pattern = r'Mass error:\s*(\d+\.?\d*)%'
|
| 1224 |
-
match = re.search(mass_error_pattern, clean_text)
|
| 1225 |
-
|
| 1226 |
-
is_high_error = False
|
| 1227 |
-
if match:
|
| 1228 |
-
mass_error_percent = float(match.group(1))
|
| 1229 |
-
is_high_error = mass_error_percent >= 10.0
|
| 1230 |
-
|
| 1231 |
-
return clean_text, is_high_error
|
| 1232 |
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
Remove ANSI escape codes from text and return clean text.
|
| 1236 |
-
"""
|
| 1237 |
-
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
| 1238 |
-
return ansi_escape.sub('', text)
|
| 1239 |
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
|
|
|
|
|
|
| 1258 |
|
| 1259 |
-
def clean_ansi_codes(text):
|
| 1260 |
-
"""
|
| 1261 |
-
Remove ANSI escape codes from text and return clean text.
|
| 1262 |
-
"""
|
| 1263 |
-
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
| 1264 |
-
return ansi_escape.sub('', text)
|
| 1265 |
|
| 1266 |
-
def format_reagent_error(error_text):
|
| 1267 |
-
"""
|
| 1268 |
-
Format reagent error text for Streamlit display.
|
| 1269 |
-
Converts ANSI color codes and determines if mass error >= 10%.
|
| 1270 |
-
"""
|
| 1271 |
-
# Clean the text of ANSI codes
|
| 1272 |
-
clean_text = clean_ansi_codes(error_text)
|
| 1273 |
-
|
| 1274 |
-
# Extract mass error percentage using regex
|
| 1275 |
-
mass_error_pattern = r'Mass error:\s*(\d+\.?\d*)%'
|
| 1276 |
-
match = re.search(mass_error_pattern, clean_text)
|
| 1277 |
-
|
| 1278 |
-
is_high_error = False
|
| 1279 |
-
if match:
|
| 1280 |
-
mass_error_percent = float(match.group(1))
|
| 1281 |
-
is_high_error = mass_error_percent >= 10.0
|
| 1282 |
-
|
| 1283 |
-
return clean_text, is_high_error
|
| 1284 |
|
| 1285 |
def main():
|
| 1286 |
st.set_page_config(
|
|
|
|
| 11 |
|
| 12 |
# Define source folder and console outpiut colors
|
| 13 |
folder_path = r"C:\Users\match\Downloads"
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# ------------------------------------------------
|
| 16 |
# 1) Amino-acid dictionary (neutral, free AA)
|
|
|
|
| 975 |
|
| 976 |
return ' '.join(re.sub(r'\[\d+\]', '', file_contents).split()).strip()
|
| 977 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 978 |
|
| 979 |
def check_reagents(pdf_file_path):
|
| 980 |
"""
|
|
|
|
| 993 |
file_contents = clean_file_contents(file_contents)
|
| 994 |
|
| 995 |
# search for the following pattern "x mg Y (z mmol" where Y is a string and x and z are float or integer and transform it into "Y (x mg, z mmol)"
|
|
|
|
| 996 |
pattern = r"(\d+\.?\d*) mg ([a-zA-Z0-9-]+) \((\d+\.?\d*) mmol\)"
|
| 997 |
|
|
|
|
| 998 |
def transform_string(s):
|
| 999 |
return re.sub(pattern, r"\2 (\1 mg, \3 mmol)", s)
|
| 1000 |
|
|
|
|
| 1001 |
file_contents = transform_string(file_contents)
|
| 1002 |
result = extract_values_from_text(file_contents)
|
| 1003 |
x, y, z = extract_info_from_list(result)
|
|
|
|
| 1005 |
if x:
|
| 1006 |
for i in range(len(x)):
|
| 1007 |
if x[i]:
|
|
|
|
| 1008 |
if z[i] != 0:
|
| 1009 |
apparent_mw = y[i] / z[i]
|
| 1010 |
else:
|
| 1011 |
apparent_mw = 1000.66
|
| 1012 |
mw = None
|
| 1013 |
+
words = x[i].split()
|
| 1014 |
if words and len(words[0]) > 4:
|
| 1015 |
formula_from_name = name_to_sum_formula(x[i])
|
| 1016 |
else:
|
|
|
|
| 1028 |
if mw:
|
| 1029 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1030 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
|
| 1031 |
+
reagent_errors.append(error_msg)
|
|
|
|
|
|
|
|
|
|
| 1032 |
|
| 1033 |
if not mw:
|
| 1034 |
x[i] = remove_first_word(x[i])
|
|
|
|
| 1053 |
if mw:
|
| 1054 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1055 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%' + (' *' if formula_from_name else '')})"
|
| 1056 |
+
reagent_errors.append(error_msg)
|
|
|
|
|
|
|
|
|
|
| 1057 |
|
| 1058 |
if not mw:
|
| 1059 |
x[i] = remove_first_word(x[i])
|
|
|
|
| 1082 |
if mw and mw != 666.66:
|
| 1083 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1084 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name else '')}"
|
| 1085 |
+
reagent_errors.append(error_msg)
|
|
|
|
|
|
|
|
|
|
| 1086 |
|
| 1087 |
if not mw:
|
| 1088 |
x[i] = remove_first_word(x[i])
|
|
|
|
| 1123 |
if mw and mw != 666.66:
|
| 1124 |
mass_error = abs(round(((mw / apparent_mw) - 1) * 100, 1))
|
| 1125 |
error_msg = f"{x[i]} ({y[i]} mg, {z[i]} mmol) MW: {mw}, used: {apparent_mw:.2f} (Mass error: {mass_error}{'%)' + (' *' if formula_from_name or replacement_formula else '')}"
|
| 1126 |
+
reagent_errors.append(error_msg)
|
|
|
|
|
|
|
|
|
|
| 1127 |
|
| 1128 |
except Exception as e:
|
|
|
|
| 1129 |
reagent_errors.append(f"an error occurred: {e}")
|
| 1130 |
|
| 1131 |
return reagent_errors
|
| 1132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1136 |
|
| 1137 |
+
# Specify the dictionary file name
|
| 1138 |
+
file_name = "chemical_data.txt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1139 |
|
| 1140 |
+
# Initialize an empty dictionary
|
| 1141 |
+
chemical_dict = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
|
| 1143 |
+
# Load the dictionary from the file
|
| 1144 |
+
try:
|
| 1145 |
+
with open(file_name, "r", encoding="utf-8") as file: # Explicit UTF-8 encoding
|
| 1146 |
+
for line in file:
|
| 1147 |
+
# Split each line into key and value
|
| 1148 |
+
parts = line.strip().split(":")
|
| 1149 |
+
|
| 1150 |
+
if len(parts) == 2:
|
| 1151 |
+
# Extract key and value
|
| 1152 |
+
chemical = parts[0].strip()
|
| 1153 |
+
formula = parts[1].strip()
|
| 1154 |
+
# Add to the dictionary
|
| 1155 |
+
chemical_dict[chemical] = formula
|
| 1156 |
+
except FileNotFoundError:
|
| 1157 |
+
print(f"File '{file_name}' not found. Make sure the file exists.")
|
| 1158 |
+
except UnicodeDecodeError:
|
| 1159 |
+
print(f"Encoding issue detected. Try opening '{file_name}' with a different encoding.")
|
| 1160 |
+
|
| 1161 |
+
# Print the loaded dictionary
|
| 1162 |
+
print("Loaded Dictionary: chemical_dict.txt")
|
| 1163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1165 |
|
| 1166 |
def main():
|
| 1167 |
st.set_page_config(
|