Spaces:

Matchball
/

accurate_mass

Running

App Files Files Community

Matchball commited on Mar 11, 2025

Commit

4c8f4de

verified ·

1 Parent(s): 81d6890

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -40

app.py CHANGED Viewed

@@ -1333,91 +1333,89 @@ def main():
                 analyze_content(text_content)
-import streamlit as st
-import re
 def analyze_content(text_content):
-    st.set_page_config(layout="wide")  # Ensures better layout control
-    # Add explicit CSS to ensure scrollbar is visible
-    st.markdown(
-        """
-        <style>
-            div.block-container {
-                overflow-y: auto;
-            }
-            iframe {
-                overflow: visible !important;
-            }
-        </style>
-        """, unsafe_allow_html=True
-    )
     text_content = remove_specific_lines_from_string(text_content)
-    text_content = re.sub(r'\s+', ' ', text_content).strip()
     text_content = process_replacements(text_content)
     text_content = replace_comma_with_decimal(text_content)
     text_content = adjust_space_around_decimal(text_content)
     text_content = fix_floats(text_content)
     text_content = remove_page_numbers(text_content)
     text_content = re.sub(r'\[((C\d+(?:[A-Z][a-z]?\d*)*),\s*([M+][^]]+))', r'\1 [\3]', text_content)
-    text_content = re.sub(r'(C)(\d+)(h)(\d+)', lambda m: f'C{m.group(2)}H{m.group(4)}', text_content, flags=re.IGNORECASE)
-    text_content = re.sub(r'(c)(\d+)(H)(\d+)', lambda m: f'C{m.group(2)}H{m.group(4)}', text_content, flags=re.IGNORECASE)
     text_content = re.sub(r'\b(C)(\d+)(HD)\b', r'C\2H1D', text_content)
     text_content = re.sub(r'\b(C)\s*(\d*)\s*(H)\s*(\d*)\s*(N)\s*(\d*)\b',
-                          lambda m: f"{m.group(1)}{m.group(2) or ''}{m.group(3)}{m.group(4) or ''}{m.group(5)}{m.group(6) or ''}",
                           text_content)
     text_content = re.sub(r'\b(C)\s*(\d*)\s*(H)\s*(\d*)\s*(O)\s*(\d*)\b',
-                          lambda m: f"{m.group(1)}{m.group(2) or ''}{m.group(3)}{m.group(4) or ''}{m.group(5)}{m.group(6) or ''}",
                           text_content)
     text_content = text_content.replace("C2o", "C20").replace("C1o", "C10").replace("Cal", "cal")
     text_content = re.sub(r'B(\d+)H(\d+)', r'H\2B\1', text_content)
-    text_content = text_content.replace('\n', ' ').replace('+-', '+').replace(':', " ").replace('–', '-').replace(',', " ")
     text_content = remove_spaces_within_brackets(text_content)
     text_content = re.sub(r'\(\[([^]]{1,10})]\+\)', r'[\1]+', text_content)
     text_content = re.sub(r'\[\[([^]]{1,10})]\+]', r'[\1]+', text_content)
     text_content = text_content.replace(' [[', '[').replace(']]', ']')
-    replacements = {"₁": "1", "₂": "2", "₃": "3", "₄": "4", "₅": "5", "₆": "6", "₇": "7", "₈": "8",
-                    "₉": "9", "₀": "0", "¹": "1", "²": "2", "³": "3", "⁴": "4", "⁵": "5", "⁶": "6",
-                    "⁷": "7", "⁸": "8", "⁹": "9", "⁰": "0", "С": "C", "Н": "H", "C ": "C", " H ": "H",
-                    " F ": "F", " N ": "N", " Cl ": "Cl", " Br ": "Br", " O ": "O", " I ": "I", " P ": "P",
-                    " B ": "B", " S ": "S", " NO ": "NO", " Na ": "Na", " SNa ": "SNa", " NNa ": "NNa", " + ": "+"}
     for original, replacement in replacements.items():
         text_content = text_content.replace(original, replacement)
-    text_content = remove_spaces_in_formula(text_content).replace('#', '')
     text_content = re.sub(r'(C\d+)', r' \1', text_content)
     text_content = transform_expressions_in_text(text_content)
     text_content = isotope_correct(text_content)
     text_content = protect_floats(text_content)
-    text_content = text_content.replace("[13C]", "H1HeXe").replace("CF", "C1F").replace("HN", "H1N")
     results1 = search_hrms_with_floats(text_content)
     modified_text = text_content
     for match in results1:
         modified_text = modified_text.replace(match, '')
     modified_text = re.sub(r'\s+', ' ', modified_text).strip()
     text_content = modified_text
     results2 = search_calcd_with_floats(text_content)
     results = results1 + results2
     cleaned_results = hrms_cleanup(results, error_dictionary)
     cleaned_results = calc_dev_calcd_and_recalcd(cleaned_results)
     cleaned_results = remove_sublists_with_missing_element1_positions_swapped(cleaned_results)
     cleaned_results_new = []
     for sublist in cleaned_results:
         if sublist not in cleaned_results_new:
             cleaned_results_new.append(sublist)
     cleaned_results = cleaned_results_new
     num_row = len(cleaned_results)
     if cleaned_results:
@@ -1429,6 +1427,5 @@ def analyze_content(text_content):
         st.write(" ")
         st.write(f"No HRMS matches found in the uploaded file")
 if __name__ == '__main__':
     main()

                 analyze_content(text_content)
 def analyze_content(text_content):
     text_content = remove_specific_lines_from_string(text_content)
+    # st.write(text_content)
+    text_content = re.sub(r'\s+', ' ', text_content).strip()  # Replace multiple spaces with a single space
     text_content = process_replacements(text_content)
     text_content = replace_comma_with_decimal(text_content)
     text_content = adjust_space_around_decimal(text_content)
     text_content = fix_floats(text_content)
+    # st.write(text_content)
     text_content = remove_page_numbers(text_content)
     text_content = re.sub(r'\[((C\d+(?:[A-Z][a-z]?\d*)*),\s*([M+][^]]+))', r'\1 [\3]', text_content)
+    text_content = re.sub(r'(C)(\d+)(h)(\d+)', lambda m: f'C{m.group(2)}H{m.group(4)}', text_content,
+                          flags=re.IGNORECASE)
+    text_content = re.sub(r'(c)(\d+)(H)(\d+)', lambda m: f'C{m.group(2)}H{m.group(4)}', text_content,
+                          flags=re.IGNORECASE)
     text_content = re.sub(r'\b(C)(\d+)(HD)\b', r'C\2H1D', text_content)
     text_content = re.sub(r'\b(C)\s*(\d*)\s*(H)\s*(\d*)\s*(N)\s*(\d*)\b',
+                          lambda
+                              m: f"{m.group(1)}{m.group(2) or ''}{m.group(3)}{m.group(4) or ''}{m.group(5)}{m.group(6) or ''}",
                           text_content)
     text_content = re.sub(r'\b(C)\s*(\d*)\s*(H)\s*(\d*)\s*(O)\s*(\d*)\b',
+                          lambda
+                              m: f"{m.group(1)}{m.group(2) or ''}{m.group(3)}{m.group(4) or ''}{m.group(5)}{m.group(6) or ''}",
                           text_content)
     text_content = text_content.replace("C2o", "C20").replace("C1o", "C10").replace("Cal", "cal")
     text_content = re.sub(r'B(\d+)H(\d+)', r'H\2B\1', text_content)
+    text_content = text_content.replace('\n', ' ').replace('+-', '+').replace(':', " ").replace('–', '-').replace(',',
+                                                                                                                  " ")
     text_content = remove_spaces_within_brackets(text_content)
+    # Remove nested brackets from [(M+H]]+ etc.
     text_content = re.sub(r'\(\[([^]]{1,10})]\+\)', r'[\1]+', text_content)
     text_content = re.sub(r'\[\[([^]]{1,10})]\+]', r'[\1]+', text_content)
     text_content = text_content.replace(' [[', '[').replace(']]', ']')
+    replacements = {
+        "₁": "1", "₂": "2", "₃": "3", "₄": "4", "₅": "5",
+        "₆": "6", "₇": "7", "₈": "8", "₉": "9", "₀": "0",
+        "¹": "1", "²": "2", "³": "3", "⁴": "4", "⁵": "5",
+        "⁶": "6", "⁷": "7", "⁸": "8", "⁹": "9", "⁰": "0",
+        "С": "C", "Н": "H",
+        "C ": "C", " H ": "H", " F ": "F", " N ": "N", " Cl ": "Cl",
+        " Br ": "Br", " O ": "O", " I ": "I", " P ": "P", " B ": "B",
+        " S ": "S", " NO ": "NO", " Na ": "Na", " SNa ": "SNa", " NNa ": "NNa",
+        " + ": "+"
+    }
+    # Apply replacements and additional processing steps.
     for original, replacement in replacements.items():
         text_content = text_content.replace(original, replacement)
+    text_content = remove_spaces_in_formula(text_content)
+    text_content = text_content.replace('#', '')
     text_content = re.sub(r'(C\d+)', r' \1', text_content)
     text_content = transform_expressions_in_text(text_content)
     text_content = isotope_correct(text_content)
     text_content = protect_floats(text_content)
+    text_content = text_content.replace("[13C]", "H1HeXe")
+    text_content = text_content.replace("CF", "C1F")
+    text_content = text_content.replace("HN", "H1N")
+    # st.write(text_content)  # Optionally display intermediate output
     results1 = search_hrms_with_floats(text_content)
     modified_text = text_content
     for match in results1:
         modified_text = modified_text.replace(match, '')
+    # Clean up any extra spaces
     modified_text = re.sub(r'\s+', ' ', modified_text).strip()
     text_content = modified_text
     results2 = search_calcd_with_floats(text_content)
     results = results1 + results2
     cleaned_results = hrms_cleanup(results, error_dictionary)
     cleaned_results = calc_dev_calcd_and_recalcd(cleaned_results)
     cleaned_results = remove_sublists_with_missing_element1_positions_swapped(cleaned_results)
+    # Remove duplicate sublists
     cleaned_results_new = []
     for sublist in cleaned_results:
         if sublist not in cleaned_results_new:
             cleaned_results_new.append(sublist)
     cleaned_results = cleaned_results_new
+    # Count the total number of measurements
     num_row = len(cleaned_results)
     if cleaned_results:
         st.write(" ")
         st.write(f"No HRMS matches found in the uploaded file")
 if __name__ == '__main__':
     main()