Spaces:

Matchball
/

SI_Check

Running

App Files Files Community

SI_Check / app.py

Matchball

Update app.py

f0e3bb2 verified 5 months ago

raw

history blame contribute delete

24.2 kB

	import streamlit as st
	import re
	import time
	import logging
	import fitz # PyMuPDF
	import io

	# Configure logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


	def check_US_UK_consistency(text):
	"""
	Searches the input text for inconsistent use of US and UK English spellings.
	Returns a list of issues for inconsistent spellings found.
	Excludes matches if an integer between 1900 and 2100 appears within 200 characters after the match.

	Args:
	text (str): The string to search through.

	Returns:
	list: List of strings describing the inconsistencies found, or empty list if none.
	"""
	issues = []

	spelling_pairs = [
	('analyze(?:d\|ing)?', 'analyse(?:d\|ing)?'),
	('(?:un)?catalyze(?:d\|s\|ing)?', '(?:un)?catalyse(?:d\|s\|ing)?'),
	('sulfur', 'sulphur'),
	('aluminum', 'aluminium'),
	('color(?:ed\|ing\|s\|less)?', 'colour(?:ed\|ing\|s\|less)?'),
	('flavor(?:ed\|ing\|s)?', 'flavour(?:ed\|ing\|s)?'),
	('liter', 'litre'),
	('fiber', 'fibre'),
	('meter', 'metre'),
	('neighbor(?:ed\|ing\|s)?', 'neighbour(?:ed\|ing\|s)?'),
	('(?:re)?organiz(?:e\|ed\|ing\|es\|ation)', '(?:re)?organis(?:e\|ed\|ing\|es\|ation)'),
	('vapor', 'vapour'),
	('behavior', 'behaviour'),
	('realiz(?:e\|ed\|ing\|es\|ation)', 'realis(?:e\|ed\|ing\|es\|ation)'),
	('synthetize(?:d\|s)?', 'synthetise(?:d\|s)?'),
	('characteriz(?:e\|ed\|ing\|es\|ation)', 'characteris(?:e\|ed\|ing\|es\|ation)'),
	('(?:re)?crystalliz(?:e\|ed\|ing\|es\|ation)', '(?:re)?crystallis(?:e\|ed\|ing\|es\|ation)'),
	('polymeriz(?:e\|ed\|ing\|es\|ation)', 'polymeris(?:e\|ed\|ing\|es\|ation)'),
	('oxidized', 'oxidised'),
	('neutraliz(?:e\|ed\|ing\|es\|ation)', 'neutralis(?:e\|ed\|ing\|es\|ation)'),
	('hydrolyzed', 'hydrolysed'),
	('standardiz(?:e\|ed\|ing\|es\|ation)', 'standardis(?:e\|ed\|ing\|es\|ation)'),
	('ioniz(?:e\|ed\|ing\|es\|ation)', 'ionis(?:e\|ed\|ing\|es\|ation)'),
	('solubiliz(?:e\|ed\|ing\|es\|ation)', 'solubilis(?:e\|ed\|ing\|es\|ation)'),
	('functionalized', 'functionalised'),
	('electrolyzed', 'electrolysed'),
	('homogeniz(?:e\|ed\|ing\|es\|ation)', 'homogenis(?:e\|ed\|ing\|es\|ation)'),
	('lyophiliz(?:e\|ed\|ing\|es\|ation)', 'lyophilis(?:e\|ed\|ing\|es\|ation)'),
	('polariz(?:e\|ed\|ing\|es\|ation)', 'polaris(?:e\|ed\|ing\|es\|ation)'),
	('isomeriz(?:e\|ed\|ing\|es\|ation)', 'isomeris(?:e\|ed\|ing\|es\|ation)'),
	('immobiliz(?:e\|ed\|ing\|es\|ation)', 'immobilis(?:e\|ed\|ing\|es\|ation)'),
	('stabiliz(?:e\|ed\|ing\|es\|ation)', 'stabilis(?:e\|ed\|ing\|es\|ation)'),
	('optimiz(?:e\|ed\|ing\|es\|ation)', 'optimis(?:e\|ed\|ing\|es\|ation)'),
	('odor', 'odour'),
	('galvaniz(?:e\|ed\|ing\|es\|ation)', 'galvanis(?:e\|ed\|ing\|es\|ation)'),
	('(?:re)?model(?:ing\|ed\|s)?', '(?:re)?modell(?:ing\|ed\|s)?'),
	('(?:re)?label(?:ing\|ed\|s)?', '(?:re)?labell(?:ing\|ed\|s)?'),
	('gray', 'grey'),
	]


	year_pattern = re.compile(r'\b(19\d{2}\|20\d{2}\|2100)\b')

	for us, uk in spelling_pairs:
	us_matches = [m for m in re.finditer(r'\b' + us + r'\b', text, re.I)]
	uk_matches = [m for m in re.finditer(r'\b' + uk + r'\b', text, re.I)]

	valid_us_matches = []
	for match in us_matches:
	after_text = text[match.end():match.end()+200]
	if not year_pattern.search(after_text):
	valid_us_matches.append(match)

	valid_uk_matches = []
	for match in uk_matches:
	after_text = text[match.end():match.end()+200]
	if not year_pattern.search(after_text):
	valid_uk_matches.append(match)

	if valid_us_matches and valid_uk_matches:
	issue = f"Inconsistent UK/US spelling detected:\n\n"

	# Add US spelling examples (limit to 3)
	issue += "US spelling examples:\n"
	for match in valid_us_matches[:3]:
	start, end = match.span()
	context = text[max(0, start-20):end+20]
	issue += f" • ...{context}...\n"

	# Add UK spelling examples (limit to 3)
	issue += "\nUK spelling examples:\n"
	for match in valid_uk_matches[:3]:
	start, end = match.span()
	context = text[max(0, start-20):end+20]
	issue += f" • ...{context}...\n"

	issue += "\n→ Reminder: Maintain consistent spelling throughout the manuscript!"
	issues.append(issue)

	return issues



	def transform_citations(text, journal_patterns=None):
	"""
	Transform all citations in a text from format "Journal Vol(Issue), Pages (Year)"
	to "Journal Year, Vol(Issue), Pages"
	"""
	if journal_patterns is None:
	# Default patterns for common journals
	journal_patterns = [
	r'J\. Am\. Chem\. Soc\.',
	r'Chem\. Eur\. J\.',
	r'Angew\. Chem\. Int\. Ed\.',
	r'ACS\. Catal.\.',
	r'Org\. Lett.\.',
	r'Tetrahedron\. Lett.\.',
	# Add more journal patterns as needed
	]

	# Create pattern for full citation
	journal_group = f"({'\|'.join(journal_patterns)})"
	# Updated volume_group to include optional issue in parentheses
	volume_group = r'(\d+(?:\(\d+\))?)'
	pages_group = r'(\d+(?:[-–]\d+)?)'
	year_group = r'\((\d{4})\)'

	pattern = f"{journal_group}\\s+{volume_group},\\s{pages_group}\\s{year_group}"

	def replace_citation(match):
	journal = match.group(1)
	volume = match.group(2) # Now includes issue if present
	pages = match.group(3)
	year = match.group(4)

	# Check if there's a period after the citation
	end_period = '.' if match.string[match.end():].startswith('.') else ''

	return f"{journal} {year}, {volume}, {pages}{end_period}"

	# Replace all matching citations in the text
	processed_text = re.sub(pattern, replace_citation, text)

	return processed_text


	def validate_citation(text):
	"""
	Validates citations in the format "<Journal> <Year>, <Volume>" where <Year> and <Volume> are integers.
	Checks if Year - Volume equals the journal's founding year offset.
	"""
	# Dictionary mapping journals to their founding year offsets
	journal_offsets = {
	"J. Am. Chem. Soc.": 1878,
	"Org. Lett.": 1998,
	"Chem. Eur. J.": 1994,
	"ACS Catal.": 2010,
	"Angew. Chem. Int. Ed.": 1961,
	"Tetrahedron Lett.": 1959,
	}

	# Create the regex pattern, sorting by length to match longer names first
	sorted_journals = sorted(journal_offsets.keys(), key=len, reverse=True)
	journal_patterns = [re.escape(name) for name in sorted_journals]
	journals_regex = '\|'.join(journal_patterns)

	# Complete pattern with year and volume groups
	pattern = f"({journals_regex})\\s+(\\d+),\\s*(\\d+)"

	# Find all matches in the text
	matches = re.finditer(pattern, text)

	results = []
	for match in matches:
	journal = match.group(1) # Exact journal match
	year = int(match.group(2))
	volume = int(match.group(3))

	offset = journal_offsets.get(journal)
	citation = f"{journal} {year}, {volume}"

	if offset is None:
	results.append(f"{citation}: Journal not supported.")
	elif year - volume != offset:
	results.append(f"{citation}: wrong year or volume (expected offset {offset})")

	return results


	def check_text(text):
	"""
	Searches the input text for various patterns using regex and shows context around matches.
	"""
	patterns = {
	r'\b(\S+\s+\d+(?:\.\d+)?\s+oC\b)': "Use the ° symbol in °C, not a superscripted o: ",
	r'\b\d+(?:\.\d+)?\s+%\s+yield\b': "No space between the numeric value and %: ",
	r'\b\d+(?:\.\d+)?\s*mg/ml\b': "The volume is specified in mL, not ml: ",
	r'\b\d+(?:\.\d+)?\s+ml\b': "The volume is specified in mL, not ml: ",
	r'\b(?:one\|two\|three)(?!-)\s+neck(?:ed)?\b\|\b(?:round\|flat)(?!-)\s+bottom\b\|\bpear(?!-)\s+shaped\b': (
	"Hyphenate 'one-necked' and 'round-bottom, e.g. one-necked round-bottom flask): "
	),
	r'\b\d+(?:\.\d+)?[-]\s*[mL]L\s+round\b': "No hyphen around L and mL",
	r'\banti-bacterial\b': "Use 'antibacterial' without hyphen: ",
	r'\bco-operation\b': "Use 'cooperation' without hyphen: ",
	r'\bmicro-organism\b': "Use 'microorganism' without hyphen: ",
	r'\bmulti-colored\b': "Use 'multicolored' without hyphen: ",
	r'\bnon-polar\b': "Use 'nonpolar' without hyphen: ",
	r'\bphoto-redox\b': "Use 'photoredox' without hyphen: ",
	r'\bpre-cooled\b': "Use 'precooled' without hyphen: ",
	r'\bsuper-acid\b': "Use 'superacid' without hyphen: ",
	r'\bmembered-ring\b': "Use 'membered ring' without hyphen: ",
	r'\bMembered-Ring\b': "Use 'Membered Ring' without hyphen: ",
	r'\bBronsted acid\b': "Use ø in Brønsted: ",
	r'X-Ray': "Always use lowercase r in X-ray (even when capitalized): ",
	r'x ray': "Use X and hyphen in X-ray: ",
	r'X ray': "Use hyphen in X-ray: ",
	r'\(-\)-': "Use (–)- instead of ",
	r'\b\d+(?:\.\d+)?mL\b': "Missing space between value and mL: ",
	r'\b\d+(?:\.\d+)?µm\b': "Missing space between value and µm: ",
	r'\b\d+(?:\.\d+)?mm\b': "Missing space between value and mm: ",
	r'\b\d+(?:\.\d+)?cm\b': "Missing space between value and cm: ",
	r'\b\d+(?:\.\d+)?mg\b': "Missing space between value and mg: ",
	r'\b\d+(?:\.\d+)?min\b': "Missing space between value and min: ",
	r'(?<!\[)\b\d+(?:\.\d+)?M\b': "Missing space if M means molar (concentration): ",
	r'\b\d+(?:\.\d+)?mM\b': "Missing space between value and mM: ",
	r'\b\d+(?:\.\d+)?μM\b': "Missing space between value and μM: ",
	r'\b(?!1[45]N)(\d+(?:\.\d+)?)N\b': "Missing space if N means normal (concentration): ",
	r'\b\d+(?:\.\d+)?K\b': "Missing space if K means Kelvin: ",
	r'\b\d+,\d+(?=\s?(?:g\|mg\|mol\|mmol\|M\|h\|min\|°C\|mL)\b)': "Incorrect use of a comma instead of a decimal point",
	r',\s*\d+\.\s+\d+(?=\s?(?:g\|mg\|mol\|mmol\|h\|min\|°C\|mL)\b)': "Unintended space? ",
	r'(?<![a-zA-Z0-9])([‒−–-]\d+(?:\.\d+)?)\s[‒−–-]{1,2}\s([‒−–-]\d+(?:\.\d+)?)(?![a-zA-Z0-9])': "Use '–a.b to –c.d' for negative numeric ranges: ",
	r'\b(\d+(?:\.\d+)?)-(\d+(?:\.\d+)?)\s* °C\b': "Use en dash (–) for temperature ranges: ",
	r'\b(\d+(?:\.\d+)?)-(\d+(?:\.\d+)?)\s* g\b': "Use en dash (–) for mass ranges: ",
	r'\b(\d+(?:\.\d+)?)-(\d+(?:\.\d+)?)\s* mg\b': "Use en dash (–) for mass ranges: ",
	r'from\s+(\d+(?:\.\d+)?)\s[–—]\s(\d+(?:\.\d+)?(?:\s[A-Za-z°])?)\b':
	"Do not use en dash in a 'from X—Y' construction. Use 'from X to Y' instead: ",
	r'between\s+(\d+(?:\.\d+)?)\s[–—]\s(\d+(?:\.\d+)?(?:\s[A-Za-z°])?)\b':
	"Do not use en dash in a 'between X—Y' construction. Use 'between X and Y' instead: ",
	r'\b\d+\s+fold\b': "Hyphenate numeral and 'fold': ",
	r'\b\d+(?:\.\d+)?°C\b': "Missing space between value and °C: ",
	r'\b\d+(?:\.\d+)?° K\b': "Use K without °, e.g. 298 K: ",
	r'\b\d+(?:\.\d+)?±\d+(?:\.\d+)?\b': "Missing spaces around the ± symbol: ",
	r'\b\d+(?:\.\d+)?\s*uL\b': "Use μL instead of uL for microliters: ",
	r'\b\d+(?:\.\d+)?\s*ug\b': "Use μg instead of ug for micrograms: ",
	r'\b\d+(?:\.\d+)?\s*umol\b': "Use μmol instead of umol for micromol: ",
	r'\b\d+(?:\.\d+)?\s*uM\b': "Use μM instead of uM for micromolar: ",
	r'\b\d+(?:\.\d+)?ppm\b': "Missing space between value and ppm: ",
	r'\b\d+(?:\.\d+)?bar\b': "Missing space between value and bar: ",
	r'\b\d+(?:\.\d+)?mbar\b': "Missing space between value and mbar: ",
	r'\b\d+(?:\.\d+)?\s*mol/l\b': "Use mol/L instead of mol/l: ",
	r'\b\d+(?:\.\d+)?\s*g/l\b': "Use g/L instead of g/l: ",
	r'\b\d+(?:\.\d+)?\s*mol·l–1\b': "Use mol·L⁻¹ instead of mol·l⁻¹: ",
	r'\b\d+(?:\.\d+)?\s*g·l–1\b': "Use g·L⁻¹ instead of g·l⁻¹: ",
	r'\b\d+(?:\.\d+)?\s*mhz\b': "Use MHz (capital H): ",
	r'\b\d+(?:\.\d+)?\s*gr\b': "Use g instead of gr: ",
	r'\b\d+(?:\.\d+)?\s*hrs?\b': "Use h instead of hr/hrs: ",
	r'/[Ee]natio/': "Misspelling of enantio...: ",
	r'/[Aa]symetr/': "Misspelling of asymmetr...: ",
	r'/[Pp]thal/': "Misspelling of phthal...: ",
	r'/[Nn]aphth.../': "Misspelling of naphth...: ",
	r'/[Ss]terosel.../': "Misspelling of stereosel...: ",
	r'\s+(-?\d+(\.\d+)?)\s+eq\.(?!\s*\d)': "Use 'equiv' for equivalents and 'eq.' for equation: ",
	r'\s+(-?\d+(\.\d+)?)\s+eq\)(?!\s*\d)': "Use 'equiv' for equivalents: ",
	r'[Cc]alc[\'´](?:d\|ed)': "Use Calcd or calcd instead of ",
	r'treated with': "Check if 'reacted/washed/extracted with' etc. is more appropriate than ",

	# Joining names
	r'Diels-Alder': "Use en dash (–) for Diels–Alder: ",
	r'Bednorz-Müller': "Use en dash (–) for Bednorz–Müller: ",
	r'Beer-Lambert': "Use en dash (–) for Beer–Lambert: ",
	r'Bose-Einstein': "Use en dash (–) for Bose–Einstein: ",
	r'Debye-Hückel': "Use en dash (–) for Debye–Hückel: ",
	r'Fermi-Dirac': "Use en dash (–) for Fermi–Dirac: ",
	r'Fischer-Tropsch': "Use en dash (–) for Fischer–Tropsch: ",
	r'Fisher-Johns': "Use en dash (–) for Fisher–Johns: ",
	r'Flory-Huggins': "Use en dash (–) for Flory–Huggins: ",
	r'Franck-Condon': "Use en dash (–) for Franck–Condon: ",
	r'Friedel-Crafts': "Use en dash (–) for Friedel–Crafts: ",
	r'Geiger-Müller': "Use en dash (–) for Geiger–Müller: ",
	r'Henderson-Hasselbalch': "Use en dash (–) for Henderson–Hasselbalch: ",
	r'Jahn-Teller': "Use en dash (–) for Jahn–Teller: ",
	r'Lee-Yang-Parr': "Use en dash (–) for Lee–Yang–Parr: ",
	r'Lineweaver-Burk': "Use en dash (–) for Lineweaver–Burk: ",
	r'Mark-Houwink': "Use en dash (–) for Mark–Houwink: ",
	r'Meerwein-Ponndorf': "Use en dash (–) for Meerwein–Ponndorf: ",
	r'Michaelis-Menten': "Use en dash (–) for Michaelis–Menten: ",
	r'Stern-Volmer': "Use en dash (–) for Stern–Volmer: ",
	r"van't Hoff-Le Bel": "Use en dash (–) for van't Hoff–Le Bel: ",
	r'Wolff-Kishner': "Use en dash (–) for Wolff–Kishner: ",
	r'Young-Laplace': "Use en dash (–) for Young–Laplace: ",
	r'Ziegler-Natta': "Use en dash (–) for Ziegler–Natta: ",
	r'Baeyer-Villiger': "Use en dash (–) for Baeyer–Villiger: ",
	r'Schotten-Baumann': "Use en dash (–) for Schotten–Baumann: ",
	r'Buchwald-Hartwig': "Use en dash (–) for Buchwald–Hartwig: ",
	r'Kumada-Corriu': "Use en dash (–) for Kumada–Corriu: ",
	r'Nozaki-Hiyama': "Use en dash (–) for Nozaki–Hiyama: ",
	r'Suzuki-Miyaura': "Use en dash (–) for Suzuki–Miyaura: ",
	r'Mizoroki-Heck': "Use en dash (–) for Mizoroki–Heck: ",
	r'Wittig-Horner': "Use en dash (–) for Wittig–Horner: ",
	r'Claisen-Schmidt': "Use en dash (–) for Claisen–Schmidt: ",
	r'Stille-Kelly': "Use en dash (–) for Stille–Kelly: ",
	r'Reformatsky-Claisen': "Use en dash (–) for Reformatsky–Claisen: ",
	r'Sonogashira-Hagihara': "Use en dash (–) for Sonogashira–Hagihara: ",
	r'Grubbs-Hoveyda': "Use en dash (–) for Grubbs–Hoveyda: ",
	r'Hoveyda-Grubbs': "Use en dash (–) for Hoveyda–Grubbs: ",
	r'Petasis-Ferrier': "Use en dash (–) for Petasis–Ferrier: ",
	r'Mukaiyama-Michael': "Use en dash (–) for Mukaiyama–Michael: ",
	r'Tsuji-Trost': "Use en dash (–) for Tsuji–Trost: ",
	r'Horner-Wadsworth-Emmons': "Use en dash (–) for Horner–Wadsworth–Emmons: ",
	r'Jorgensen-Hayashi': "Use en dash (–) and ø in Jørgensen–Hayashi: ",
	r'Jørgensen-Hayashi': "Use en dash (–) in Jørgensen–Hayashi: ",
	r'Ullmann-Goldberg': "Use en dash (–) for Ullmann–Goldberg: ",
	r'Chan-Lam': "Use en dash (–) for Chan–Lam: ",
	r'Hiyama-Denmark': "Use en dash (–) for Hiyama–Denmark: ",
	r'Negishi-Brown': "Use en dash (–) for Negishi–Brown: ",
	r'Corey-Fuchs': "Use en dash (–) for Corey–Fuchs: ",
	r'Wacker-Tsuji': "Use en dash (–) for Wacker–Tsuji: ",
	r'Stork-Danheiser': "Use en dash (–) for Stork–Danheiser: ",
	r'Balz-Schiemann': "Use en dash (–) for Balz–Schiemann: ",
	r'Barton-McCombie': "Use en dash (–) for Barton–McCombie: ",
	r'Knoevenagel-Doebner': "Use en dash (–) for Knoevenagel–Doebner: ",
	r'Gattermann-Koch': "Use en dash (–) for Gattermann–Koch: ",
	r'Mukaiyama-Mannich': "Use en dash (–) for Mukaiyama–Mannich: ",
	r'Evans-Tishchenko': "Use en dash (–) for Evans–Tishchenko: ",




	r'\b\d+(?:\.\d+)?\s*degree\b': "Use ° instead of 'degree': ",
	r'\b\d+(?:\.\d+)?\s*percent\b': "Use % instead of 'percent': ",
	r'\bvacuumed\b': "Use 'evacuated' or 'under vacuum' instead of 'vacuumed': ",

	r'\b[Hh]eated\s+up\b': "Omit 'up', use 'heated': ",
	r'\b[Cc]ooled\s+down\b': "Omit 'down', use 'cooled': ",
	r'\b[Ww]armed\s+up\b': "Omit 'up', use 'warmed': ",
	r'\b[Aa]bsorbed\s+on\b': "Check whether 'adsorbed on' is more appropriate: ",

	r'(\d)\sx\s(\d)': "Use × operator instead of letter x for multiplication ",
	r'->': "Use → instead of ->",
	r' MIN ': "Use min for minutes",
	r'vaccuum': "Misspelling of vacuum ",
	r'reduced vacuum': "Use 'reduced pressure' ",
	r'(\d)×(\d)': "Leave space before and after × operator ",
	r'mol×L': "Use mol·L ",
	r'g×mol': "Use g·mol ",
	r'J×K': "Use J·K ",
	r'J×mol': "Use J·mol ",
	r'g×L': "Use g·L ",
	r'g×L': "Use mg·mL ",
	r'mol\.L-1': "Use mol·L-1 ",
	r'mol\.mL': "Use mol·mL ",
	r'g\.mol': "Use g·mol ",
	r'J\.K': "Use J·K ",
	r'J\.mol': "Use J·mol ",
	r'g\.L-1': "Use g·L-1 ",
	r'g\.L-1': "Use mg·mL-1 ",

	#Stereochemistry
	r'relative stereochemistry': "Use 'relative configuration' ",
	r'absolute stereochemistry': "Use 'absolute configuration' ",
	r'assigned stereochemistry': "Use assigned configuration ",
	r'he stereochemistry of': "the configuration of... might be better",
	r'he stereochemistry was': "the configuration was... might be better",
	r'he stereochemistry is': "the configuration is... might be better",
	r'of stereochemistry': "of the configuration... might be better",


	}

	results = []
	# Iterate over each pattern in the dictionary
	for pattern_str, message in patterns.items():
	regex = re.compile(pattern_str)
	for match in regex.finditer(text):
	# Get the start and end positions of the match
	start = max(0, match.start() - 5) # Get up to 5 chars before match
	end = min(len(text), match.end() + 10) # Get up to 10 chars after match

	# Extract the context
	context = text[start:end]

	# Store the warning message and the context
	results.append(f"{message}...{context}...")

	return results


	def extract_text_from_pdf(pdf_file):
	"""
	Extract text from a PDF file using fitz (PyMuPDF).

	Args:
	pdf_file: The uploaded PDF file object

	Returns:
	str: The extracted text from the PDF
	"""
	try:
	# Create a byte stream from the uploaded file
	pdf_bytes = io.BytesIO(pdf_file.getvalue())

	# Open the PDF with fitz
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")

	# Extract text from all pages
	text = ""
	for page_num in range(len(doc)):
	page = doc[page_num]
	text += page.get_text() + " "

	# Close the document
	doc.close()

	return text
	except Exception as e:
	st.error(f"Error extracting text from PDF: {str(e)}")
	return ""

	def main():
	st.set_page_config(page_title="Chemistry Text Analyzer", page_icon="🧪", layout="wide")
	st.markdown(
	"""
	<style>
	div.block-container {
	overflow-y: auto !important;
	}
	iframe {
	overflow: visible !important;
	}
	</style>
	""", unsafe_allow_html=True
	)
	st.title("Chemistry Text Analyzer")
	st.write("""
	This app analyzes chemistry text for common errors, inconsistencies, and formatting issues.
	Upload a PDF file or paste your text in the box below to analyze it.
	""")
	# Create tabs for different input methods
	tab1, tab2 = st.tabs(["Upload PDF", "Text Input"])
	with tab1:
	uploaded_file = st.file_uploader("Choose a PDF file", type=['pdf'])
	analyze_pdf = st.button("Analyze PDF")
	if analyze_pdf and uploaded_file is not None:
	with st.spinner("Extracting text from PDF..."):
	text_content = extract_text_from_pdf(uploaded_file)
	if text_content:
	st.success(f"Successfully extracted text from {uploaded_file.name}")
	st.write("---")
	analyze_content(text_content)
	else:
	st.error("Failed to extract text from the PDF. Please check if the PDF contains extractable text.")
	with tab2:
	# Text input area
	text_input = st.text_area("Paste your text here:", height=300)
	analyze_text = st.button("Analyze Text")
	if analyze_text:
	if not text_input:
	st.warning("Please paste some text to analyze.")
	else:
	st.write("---")
	# Replace newlines with spaces to match the original behavior
	text_content = text_input.replace('\n', ' ')
	analyze_content(text_content)

	def analyze_content(text_content):
	"""
	Analyze the text content and display results.

	Args:
	text_content: The text to analyze
	"""
	start_time = time.time()

	with st.spinner("Analyzing text..."):
	# Use expanders for each analysis type to keep the UI clean
	with st.expander("Text Format and Style Issues", expanded=True):
	text_issues = check_text(text_content)
	if text_issues:
	for issue in text_issues:
	st.write(issue)
	else:
	st.write("No text format or style issues found.")





	# Language Issues - Separate expander, not nested
	with st.expander("Language Issues", expanded=True):
	language_issues = check_US_UK_consistency(text_content)
	if language_issues:
	for issue in language_issues:
	st.markdown(issue)
	else:
	st.write("No US/UK spelling inconsistencies found.")





	with st.expander("Citation Analysis", expanded=True):
	# Transform citations (this returns the transformed text)
	transformed_text = transform_citations(text_content)
	if transformed_text != text_content:
	st.write("Citations were transformed to the proper format.")

	# Validate citations
	citation_issues = validate_citation(text_content)
	if citation_issues:
	st.write("Citation issues found:")
	for issue in citation_issues:
	st.write(issue)
	else:
	st.write("No citation issues found.")

	elapsed_time = time.time() - start_time
	st.write(f"Analysis completed in {elapsed_time:.2f} seconds.")


	if __name__ == '__main__':
	main()