Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -436,19 +436,19 @@ def isotope_correct(text):
|
|
| 436 |
# Dictionary of replacements for isotope corrections and other text cleanup
|
| 437 |
replacements = {
|
| 438 |
"For":" ","[MALDI]":"","[MALDI-TOF]":"","detected":" ","page": " ", "of": " ", "𝑀": " ", "EI": " ", " . ": " ", ":": " ", "Δ": " ",
|
| 439 |
-
"𝛼": " ", " a ": " ", "M ": " ", " H ": " ", "ESI": " ", " Na ": " ", " K ": " ",
|
| 440 |
" NH4 ": " ", "Obs.": " ", "obs": " ", "78.9183": "", "48Ti": "[48Ti]","54Fe":"[54Fe]",
|
| 441 |
"46Ti": "[46Ti]", "47Ti": "[47Ti]", " 2H": "D", " [3H]": "[3H]",
|
| 442 |
" 10B": "[10B]", "127I": "[127I]", "120Sn":"[120Sn]", "119Sn":"[119Sn]", "118Sn":"[118Sn]",
|
| 443 |
"N23Na": "*N23*Na","O23Na": "*O23*Na", "F23Na": "*F23*Na", "H23Na": "*H23*Na", "23Na":"[23Na]","H28Si": "*H28*Si", "H11B": "*H11*B",
|
| 444 |
"H13Co": "*H13*Co", "H13Cl": "*H13*Cl", "H18O": "*H18*O", "H218O": "*H218*O", "N18O": "*N18*O",
|
| 445 |
-
"H35Cl": "*H35*Cl", "H37Cl": "*H37*Cl", "H10B":"*H10*B", "H19F": "*H19*F", "H81Br":"*H81*Br","H79Br":"*H79*Br","Br79": "[79Br]",
|
| 446 |
" 79Br": "[79Br]", " 81Br": "[81Br]", "18O": "[18O]", "74Ge": "[74Ge]", "65Cu":"[65Cu]",
|
| 447 |
"63Cu":"[63Cu]", "Br81": "[81Br]", " 35Cl": "[35Cl]", " 37Cl": "[37Cl]", " 11B": "[11B]",
|
| 448 |
" 32S": "S", " 31P": "P", "35Cl":"[35Cl]", "80Se":"[80Se]", "37Cl":"[37Cl]", "28Si":"[28Si]",
|
| 449 |
"13C":"[13C]", "[13C]l":"13Cl", "96Ru":"[96Ru]","79Br":"[79Br]", "81Br":"[81Br]", "11B":"[11B]", "10B":"[10B]",
|
| 450 |
"[10B]r":"10Br", "[[":"[", "]]":"]", "*H13*Cl": "H13Cl", "*H18*O": "H18O", "*H218*O": "H218O",
|
| 451 |
-
"*N18*O": "N18O", "*H13*Co": "H13Co", "*H37*Cl": "H37Cl", "*H35*Cl": "H35Cl","*
|
| 452 |
"*H28*Si": "H28Si", "*H10*B":"H10B", "*H23*Na": "H23Na", "*F23*Na": "F23Na", "*N23*Na": "N23Na","*O23*Na": "O23Na",
|
| 453 |
"*H11*B":"H11B", "*H19*F": "H19F", "cacld": "", "calcd.": "calcd ", "calc’d": "calcd ",
|
| 454 |
"calcd gcm": " ", " is ": " ", "calcd": "calcd ", "calcd ": "calcd ","++": "+","(M":"[M", ")+":"]+ ",
|
|
@@ -1144,7 +1144,7 @@ def search_calcd_with_floats(text: str) -> List[str]:
|
|
| 1144 |
calcd_start = calcd_match.start()
|
| 1145 |
|
| 1146 |
# Look at up to 25 characters before 'calcd'
|
| 1147 |
-
pre_calcd_start = max(0, calcd_start -
|
| 1148 |
pre_calcd_text = text[pre_calcd_start:calcd_start]
|
| 1149 |
|
| 1150 |
# Check if there's a float in the pre-calcd text
|
|
|
|
| 436 |
# Dictionary of replacements for isotope corrections and other text cleanup
|
| 437 |
replacements = {
|
| 438 |
"For":" ","[MALDI]":"","[MALDI-TOF]":"","detected":" ","page": " ", "of": " ", "𝑀": " ", "EI": " ", " . ": " ", ":": " ", "Δ": " ",
|
| 439 |
+
"𝛼": " ", " a ": " ", "M ": " ", " H ": " ","(ESI)":" ", "ESI": " ", " Na ": " ", " K ": " ",
|
| 440 |
" NH4 ": " ", "Obs.": " ", "obs": " ", "78.9183": "", "48Ti": "[48Ti]","54Fe":"[54Fe]",
|
| 441 |
"46Ti": "[46Ti]", "47Ti": "[47Ti]", " 2H": "D", " [3H]": "[3H]",
|
| 442 |
" 10B": "[10B]", "127I": "[127I]", "120Sn":"[120Sn]", "119Sn":"[119Sn]", "118Sn":"[118Sn]",
|
| 443 |
"N23Na": "*N23*Na","O23Na": "*O23*Na", "F23Na": "*F23*Na", "H23Na": "*H23*Na", "23Na":"[23Na]","H28Si": "*H28*Si", "H11B": "*H11*B",
|
| 444 |
"H13Co": "*H13*Co", "H13Cl": "*H13*Cl", "H18O": "*H18*O", "H218O": "*H218*O", "N18O": "*N18*O",
|
| 445 |
+
"H35Cl": "*H35*Cl", "H37Cl": "*H37*Cl", "H10B":"*H10*B", "H19F": "*H19*F", "H81Br":"*H81*Br","-2H79Br":"-2H[79Br]","-H79Br":"-H[79Br]","H79Br":"*H79*Br","Br79": "[79Br]",
|
| 446 |
" 79Br": "[79Br]", " 81Br": "[81Br]", "18O": "[18O]", "74Ge": "[74Ge]", "65Cu":"[65Cu]",
|
| 447 |
"63Cu":"[63Cu]", "Br81": "[81Br]", " 35Cl": "[35Cl]", " 37Cl": "[37Cl]", " 11B": "[11B]",
|
| 448 |
" 32S": "S", " 31P": "P", "35Cl":"[35Cl]", "80Se":"[80Se]", "37Cl":"[37Cl]", "28Si":"[28Si]",
|
| 449 |
"13C":"[13C]", "[13C]l":"13Cl", "96Ru":"[96Ru]","79Br":"[79Br]", "81Br":"[81Br]", "11B":"[11B]", "10B":"[10B]",
|
| 450 |
"[10B]r":"10Br", "[[":"[", "]]":"]", "*H13*Cl": "H13Cl", "*H18*O": "H18O", "*H218*O": "H218O",
|
| 451 |
+
"*N18*O": "N18O", "*H13*Co": "H13Co", "*H37*Cl": "H37Cl", "*H35*Cl": "H35Cl","*H81*Br":"H81Br","*H79*Br":"H79Br",
|
| 452 |
"*H28*Si": "H28Si", "*H10*B":"H10B", "*H23*Na": "H23Na", "*F23*Na": "F23Na", "*N23*Na": "N23Na","*O23*Na": "O23Na",
|
| 453 |
"*H11*B":"H11B", "*H19*F": "H19F", "cacld": "", "calcd.": "calcd ", "calc’d": "calcd ",
|
| 454 |
"calcd gcm": " ", " is ": " ", "calcd": "calcd ", "calcd ": "calcd ","++": "+","(M":"[M", ")+":"]+ ",
|
|
|
|
| 1144 |
calcd_start = calcd_match.start()
|
| 1145 |
|
| 1146 |
# Look at up to 25 characters before 'calcd'
|
| 1147 |
+
pre_calcd_start = max(0, calcd_start - 35)
|
| 1148 |
pre_calcd_text = text[pre_calcd_start:calcd_start]
|
| 1149 |
|
| 1150 |
# Check if there's a float in the pre-calcd text
|