Spaces:

Hoctar77
/

DocumentCheckerTool

Sleeping

App Files Files Community

Hoctar77 commited on Oct 29, 2024

Commit

c6ba992

verified ·

1 Parent(s): ee7296b

Update app.py

Browse files

Files changed (1) hide show

app.py +438 -231

app.py CHANGED Viewed

@@ -6,279 +6,486 @@ import io
 import traceback
 def heading_title_check(doc, required_headings):
-    """Check if all required headings are present."""
-    headings_found = []
-    try:
-        # Iterate through paragraphs to find headings
-        for paragraph in doc.paragraphs:
-            if paragraph.style.name.startswith('Heading'):
-                headings_found.append(paragraph.text.strip())
-    except Exception as e:
-        print(f"Error in heading check: {str(e)}")
-        return False, []
-    # Check if all required headings are present
-    all_present = all(heading in headings_found for heading in required_headings)
-    return all_present, headings_found
-def acronym_check(doc):
-    """Check if all acronyms are properly defined."""
-    undefined_acronyms = set()
-    defined_acronyms = set()
-    try:
-        # Regular expression for finding acronyms (2-5 capital letters)
-        acronym_pattern = r'\b[A-Z]{2,5}\b'
-        # Check each paragraph
-        for paragraph in doc.paragraphs:
-            text = paragraph.text
-            # Find all acronyms in this paragraph
-            acronyms = re.findall(acronym_pattern, text)
-            for acronym in acronyms:
-                if acronym not in defined_acronyms:
-                    # Look for definition pattern: "full term (ACRONYM)"
-                    definition_pattern = rf'.+\({acronym}\)'
-                    if not any(re.search(definition_pattern, p.text) for p in doc.paragraphs):
-                        undefined_acronyms.add(acronym)
-                    else:
-                        defined_acronyms.add(acronym)
-    except Exception as e:
-        print(f"Error in acronym check: {str(e)}")
-        return False, []
-    return len(undefined_acronyms) == 0, list(undefined_acronyms)
 def legal_check(doc):
-    """Check if legal terminology is used correctly."""
     incorrect_legal_references = []
-    try:
-        # Define legal terminology mapping
-        legal_terms = {
-            "C.F.R.": "Code of Federal Regulations",
-            "F.R.": "Federal Register",
-            "U.S.C.": "United States Code"
-        }
-        # Check each paragraph
-        for paragraph in doc.paragraprams:
-            text = paragraph.text
-            for incorrect_term, correct_term in legal_terms.items():
-                if incorrect_term in text and correct_term not in text:
-                    incorrect_legal_references.append((incorrect_term, correct_term))
-    except Exception as e:
-        print(f"Error in legal check: {str(e)}")
-        return False, []
     return len(incorrect_legal_references) == 0, incorrect_legal_references
 def table_caption_check(doc, doc_type):
-    """Check if table captions are formatted correctly."""
-    incorrect_captions = []
-    try:
-        # Check table captions
-        for table in doc.tables:
-            # Get the paragraph before the table
-            prev_paragraph = table._element.getprevious()
-            if prev_paragraph is not None and prev_paragraph.text.startswith("Table"):
-                # Check if the caption is formatted correctly
-                if doc_type == "Advisory Circular":
-                    # AC captions should be "Table X. Caption text"
-                    if not prev_paragraph.text.startswith("Table ") or ". " not in prev_paragraph.text:
-                        incorrect_captions.append(prev_paragraph.text)
-                else:
-                    # Other doc types may have different caption formats
-                    pass
-    except Exception as e:
-        print(f"Error in table caption check: {str(e)}")
-        return False, []
     return len(incorrect_captions) == 0, incorrect_captions
 def figure_caption_check(doc, doc_type):
-    """Check if figure captions are formatted correctly."""
-    incorrect_fig_captions = []
-    try:
-        # Check figure captions
-        for paragraph in doc.paragraphs:
-            if paragraph.text.startswith("Figure"):
-                # Check if the caption is formatted correctly
-                if doc_type == "Advisory Circular":
-                    # AC captions should be "Figure X. Caption text"
-                    if ". " not in paragraph.text:
-                        incorrect_fig_captions.append(paragraph.text)
-                else:
-                    # Other doc types may have different caption formats
-                    pass
-    except Exception as e:
-        print(f"Error in figure caption check: {str(e)}")
-        return False, []
     return len(incorrect_fig_captions) == 0, incorrect_fig_captions
 def table_figure_reference_check(doc, doc_type):
-    """Check if table and figure references are formatted correctly."""
     incorrect_table_figure_references = []
-    try:
-        # Check table and figure references
-        for paragraph in doc.paragraphs:
-            text = paragraph.text
-            if "Table" in text or "Figure" in text:
-                # Check if the reference is formatted correctly
-                if doc_type == "Advisory Circular":
-                    # AC references should be "Table X" or "Figure X"
-                    if not any(text.startswith(f"{item} ") for item in ["Table", "Figure"]):
-                        incorrect_table_figure_references.append(text)
-                else:
-                    # Other doc types may have different reference formats
-                    pass
-    except Exception as e:
-        print(f"Error in table/figure reference check: {str(e)}")
-        return False, []
     return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
-def document_title_check(doc, doc_type):
-    """Check if the document title is formatted correctly."""
     incorrect_titles = []
-    try:
-        # Check the document title
-        if len(doc.paragraphs) > 0 and doc.paragraphs[0].style.name == 'Title':
-            title_text = doc.paragraphs[0].text
-            # Check the formatting based on document type
-            if doc_type == "Advisory Circular":
-                if not title_text.startswith("ADVISORY CIRCULAR ") or title_text.endswith(" AC"):
-                    incorrect_titles.append({"text": title_text, "issue": "Advisory Circular titles should start with 'ADVISORY CIRCULAR ' and end with ' AC'"})
-            elif doc_type == "Order":
-                if not title_text.startswith('"') or not title_text.endswith('"'):
-                    incorrect_titles.append({"text": title_text, "issue": "Order titles should be enclosed in quotation marks"})
-            elif doc_type == "Federal Register Notice":
-                if not title_text.startswith('"') or not title_text.endswith('"'):
-                    incorrect_titles.append({"text": title_text, "issue": "Federal Register Notice titles should be enclosed in quotation marks"})
-            elif doc_type == "Policy Statement":
-                if title_text.startswith('"') or title_text.endswith('"'):
-                    incorrect_titles.append({"text": title_text, "issue": "Policy Statement titles should not have quotation marks"})
-    except Exception as e:
-        print(f"Error in document title check: {str(e)}")
-        return False, []
     return len(incorrect_titles) == 0, incorrect_titles
-def double_period_check(doc):
-    """Check for sentences with double periods."""
-    incorrect_sentences = []
-    try:
-        # Check each paragraph for double periods
-        for paragraph in doc.paragraphs:
-            if ".." in paragraph.text:
-                incorrect_sentences.append(paragraph.text)
-    except Exception as e:
-        print(f"Error in double period check: {str(e)}")
-        return False, []
-    return len(incorrect_sentences) == 0, incorrect_sentences
 def spacing_check(doc):
-    """Check for incorrect spacing."""
     incorrect_spacing = []
-    try:
-        # Check each paragraph for spacing issues
-        for paragraph in doc.paragraphs:
-            if "  " in paragraph.text:
-                incorrect_spacing.append(paragraph.text)
-    except Exception as e:
-        print(f"Error in spacing check: {str(e)}")
-        return False, []
     return len(incorrect_spacing) == 0, incorrect_spacing
 def check_abbreviation_usage(doc):
-    """Check for consistent usage of abbreviations."""
-    abbreviation_issues = []
-    try:
-        # Regular expression to find abbreviations (2-5 capital letters)
-        abbreviation_pattern = r'\b[A-Z]{2,5}\b'
-        # Check each paragraph
-        for paragraph in doc.paragraphs:
-            text = paragraph.text
-            # Find all abbreviations in this paragraph
-            abbreviations = re.findall(abbreviation_pattern, text)
-            for abbr in abbreviations:
-                # Look for the full term definition
-                definition_pattern = rf'.+\({abbr}\)'
-                if any(re.search(definition_pattern, p.text) for p in doc.paragraphs):
-                    # Check if the abbreviation is used consistently after definition
-                    for other_paragraph in doc.paragraphs:
-                        if abbr in other_paragraph.text and definition_pattern not in other_paragraph.text:
-                            abbreviation_issues.append((definition_pattern.split('(')[0].strip(), abbr, paragraph.text))
-                            break
-    except Exception as e:
-        print(f"Error in abbreviation check: {str(e)}")
-        return []
-    return abbreviation_issues
 def check_date_formats(doc):
-    """Check for consistent date formatting."""
     date_issues = []
-    try:
-        # Look for date patterns in each paragraph
-        for paragraph in doc.paragraphs:
-            text = paragraph.text
-            if re.search(r'\b\d{1,2}/\d{1,2}/\d{4}\b', text):
-                date_issues.append((text, paragraph.text))
-    except Exception as e:
-        print(f"Error in date format check: {str(e)}")
-        return []
     return date_issues
 def check_placeholders(doc):
-    """Check for the presence of placeholders."""
-    placeholder_issues = []
-    try:
-        # Look for placeholder text in each paragraph
-        for paragraph in doc.paragraprams:
-            text = paragraph.text
-            if '[ENTER TEXT]' in text or '[ENTER DATE]' in text:
-                placeholder_issues.append((text, paragraph.text))
-    except Exception as e:
-        print(f"Error in placeholder check: {str(e)}")
-        return []
-    return placeholder_issues
-def get_document_checks(doc_type, template_type):
-    """Return the required headings and other checks based on document type."""
-    if doc_type == "Advisory Circular":
-        if template_type == "Short AC template AC":
-            return {
-                "required_headings": ["Purpose", "Applicability", "Related Reading Material",
-                                    "Background", "Discussion"]
-            }
-        else:  # Long AC template
-            return {
-                "required_headings": ["Purpose", "Applicability", "Audience", "Related Reading Material",
-                                    "Background", "Discussion", "Conclusion"]
-            }
-    # Add other document types as needed
-    return {"required_headings": []}
 def process_file(file_obj, doc_type, template_type):
     """
@@ -513,7 +720,7 @@ def format_results_for_gradio(**kwargs):
     # Placeholder Check
     results.append("## Placeholder Check")
     if not kwargs['placeholder_issues']:
-        results.append("✅ No placeholders found.\n")
     else:
         results.append("❌ Placeholders Found:")
         for phrase, paragraph in kwargs['placeholder_issues']:
@@ -554,7 +761,7 @@ demo = gr.Blocks(theme='JohnSmith9982/small_and_pretty')
 with demo:
     gr.Markdown("# Document Checker Tool")
     gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
-    gr.Markdown("* This tool is still in development *")
     gr.Markdown("Contact Eric Putnam if you have questions and comments.")
     document_types = [

 import traceback
 def heading_title_check(doc, required_headings):
+    """
+    Check if required headings are present in the document.
+    Args:
+        doc (list): List of paragraph texts from the document
+        required_headings (list): List of required heading titles
+    Returns:
+        tuple: (bool, list) - (True if all headings present, list of found headings)
+    """
+    headings_found = []
+    # Create a set of required headings for efficient lookup
+    required_headings_set = set(required_headings)
+    for para in doc:
+        para_strip = para.strip()
+        # Check if the paragraph is in the required headings list
+        if para_strip in required_headings_set:
+            headings_found.append(para_strip)
+    # Check if all required headings are found
+    all_headings_present = set(headings_found) == required_headings_set
+    return all_headings_present, headings_found
+def acronym_check(doc):
+    """Check if all acronyms are defined at first use and return undefined acronyms."""
+    defined_acronyms = set()  # Set to store defined acronyms
+    undefined_acronyms = set()  # Set to store undefined acronyms
+    acronym_pattern = re.compile(r'(\b[A-Z]{2,}\b)')  # Regex to find acronyms (2 or more uppercase letters)
+    defined_pattern = re.compile(r'(\b\w+\b) \((\b[A-Z]{2,}\b)\)')  # Regex to find definitions like "Federal Aviation Administration (FAA)"
+    for paragraph in doc:
+        # Check for defined acronyms
+        defined_matches = defined_pattern.findall(paragraph)
+        for full_term, acronym in defined_matches:
+            defined_acronyms.add(acronym)  # Add the acronym to the defined set
+        # Check for usage of acronyms
+        usage_matches = acronym_pattern.findall(paragraph)
+        for acronym in usage_matches:
+            if acronym not in defined_acronyms:
+                undefined_acronyms.add(acronym)  # Add to undefined acronyms if not defined
+    return len(undefined_acronyms) == 0, undefined_acronyms  # Return True if all acronyms are defined, along with undefined acronyms
 def legal_check(doc):
+    """Check for correct legal references in the document and suggest corrections.
+    Args:
+        doc (list): List of paragraphs/strings to check
+    Returns:
+        tuple: (bool, list) - (True if no errors found, list of (incorrect, correct) terms)
+    """
+    # Mapping of incorrect terms to their correct versions
+    incorrect_variations = {
+        r"\bUSC\b": "U.S.C.",
+        r"\bCFR Part\b": "CFR part",
+        r"\bC\.F\.R\.\b": "CFR",
+        r"\bWe\b": "The FAA",
+        r"\bwe\b": "the FAA",
+        r"\bcancelled\b": "canceled",
+        r"\bshall\b": "must or will",
+        r"\b&\b": "and"
+    }
+    # List to store tuples of incorrect terms and their correct versions
     incorrect_legal_references = []
+    for paragraph in doc:
+        # Special handling for "Title 14" / "title 14"
+        title_14_pattern = r"(?P<prefix>^|[.!?\s])\s*(?P<title>title 14|Title 14)\b"
+        matches = re.finditer(title_14_pattern, paragraph)
+        for match in matches:
+            prefix = match.group('prefix')
+            current_title = match.group('title')
+            # If it follows a sentence-ending punctuation or is at start, it should be "Title 14"
+            if prefix in ('.', '!', '?', '') and current_title.lower() == "title 14":
+                if current_title != "Title 14":
+                    incorrect_legal_references.append((current_title, "Title 14"))
+            # If it's within a sentence, it should be "title 14"
+            elif prefix.isspace() and current_title != "title 14":
+                incorrect_legal_references.append((current_title, "title 14"))
+        # Check other variations
+        for incorrect_pattern, correct_term in incorrect_variations.items():
+            matches = re.finditer(incorrect_pattern, paragraph)
+            for match in matches:
+                incorrect_legal_references.append((match.group(), correct_term))
     return len(incorrect_legal_references) == 0, incorrect_legal_references
 def table_caption_check(doc, doc_type):
+    """
+    Check for correctly formatted table captions in the document.
+    Supports both numeric (Table 1-2) and alphanumeric (Table C-1) formats.
+    """
+    if doc_type in ["Advisory Circular", "Order"]:
+        # Pattern for "Table X-Y" where X and Y can be either letters or numbers
+        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    else:
+        # Pattern for "Table X" where X can be either letters or numbers
+        table_caption_pattern = re.compile(r'^Table\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    incorrect_captions = []
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        if paragraph_strip.lower().startswith("table"):
+            if not table_caption_pattern.match(paragraph_strip):
+                incorrect_captions.append(paragraph_strip)
     return len(incorrect_captions) == 0, incorrect_captions
 def figure_caption_check(doc, doc_type):
+    """
+    Check for correctly formatted figure captions in the document.
+    Supports both numeric (Figure 1-2) and alphanumeric (Figure C-1) formats.
+    """
+    if doc_type in ["Advisory Circular", "Order"]:
+        # Pattern for "Figure X-Y" where X and Y can be either letters or numbers
+        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)-([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    else:
+        # Pattern for "Figure X" where X can be either letters or numbers
+        figure_caption_pattern = re.compile(r'^Figure\s+([A-Z0-9]+)[\.\s]', re.IGNORECASE)
+    incorrect_fig_captions = []
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        if paragraph_strip.lower().startswith("figure"):
+            if not figure_caption_pattern.match(paragraph_strip):
+                incorrect_fig_captions.append(paragraph_strip)
     return len(incorrect_fig_captions) == 0, incorrect_fig_captions
 def table_figure_reference_check(doc, doc_type):
+    """Check for incorrect references to tables and figures in the document."""
     incorrect_table_figure_references = []
+    if doc_type in ["Advisory Circular", "Order"]:
+        # For Advisory Circulars and Orders, correct references are "Table X-Y" or "Figure X-Y"
+        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(?!-\d+)\b', re.IGNORECASE)
+        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(?!-\d+)\b', re.IGNORECASE)
+    else:
+        # For other document types, correct references are "Table X" or "Figure X"
+        incorrect_table_ref_pattern = re.compile(r'\bTable\s+\d+(-\d+)?\b', re.IGNORECASE)
+        incorrect_figure_ref_pattern = re.compile(r'\bFigure\s+\d+(-\d+)?\b', re.IGNORECASE)
+    for paragraph in doc:
+        paragraph_strip = paragraph.strip()
+        # Exclude captions
+        starts_with_table_or_figure = paragraph_strip.lower().startswith('table') or paragraph_strip.lower().startswith('figure')
+        if not starts_with_table_or_figure:
+            # Find incorrect table references
+            incorrect_tables = incorrect_table_ref_pattern.findall(paragraph)
+            if incorrect_tables:
+                incorrect_table_figure_references.extend(incorrect_tables)
+            # Find incorrect figure references
+            incorrect_figures = incorrect_figure_ref_pattern.findall(paragraph)
+            if incorrect_figures:
+                incorrect_table_figure_references.extend(incorrect_figures)
+    # Return False if any incorrect references are found
     return len(incorrect_table_figure_references) == 0, incorrect_table_figure_references
+def document_title_check(doc_path, doc_type):
     incorrect_titles = []
+    doc = Document(doc_path)
+    # Updated pattern to capture titles correctly
+    ac_pattern = re.compile(r'AC\s+\d+(?:-\d+)?(?:,|\s)+(.+?)(?=\.|,|$)')
+    # Define formatting rules for different document types
+    formatting_rules = {
+        "Advisory Circular": {"italics": True, "quotes": False},
+        "Airworthiness Criteria": {"italics": False, "quotes": True},
+        "Deviation Memo": {"italics": False, "quotes": True},
+        "Exemption": {"italics": False, "quotes": True},
+        "Federal Register Notice": {"italics": False, "quotes": True},
+        "Handbook/Manual": {"italics": False, "quotes": False},
+        "Order": {"italics": False, "quotes": True},
+        "Policy Statement": {"italics": False, "quotes": False},
+        "Rule": {"italics": False, "quotes": True},
+        "Special Condition": {"italics": False, "quotes": True},
+        "Technical Standard Order": {"italics": False, "quotes": True},
+        "Other": {"italics": False, "quotes": False}
+    }
+    # Get the rules for the current document type
+    if doc_type not in formatting_rules:
+        raise ValueError(f"Unsupported document type: {doc_type}")
+    required_format = formatting_rules[doc_type]
+    for paragraph in doc.paragraphs:
+        text = paragraph.text
+        matches = ac_pattern.finditer(text)
+        for match in matches:
+            full_match = match.group(0)
+            title_text = match.group(1).strip()
+            # Get the position where the title starts
+            title_start = match.start(1)
+            # Check for any type of quotation marks, including smart quotes
+            title_in_quotes = any(q in title_text for q in ['"', "'", '"', '"', ''', '''])
+            # Check the formatting of the title
+            title_is_italicized = False
+            current_pos = 0
+            for run in paragraph.runs:
+                run_length = len(run.text)
+                if current_pos <= title_start < current_pos + run_length:
+                    relative_pos = title_start - current_pos
+                    title_is_italicized = run.italic
+                    break
+                current_pos += run_length
+            # Check if formatting matches the required format
+            formatting_incorrect = False
+            issue_message = []
+            # Check italics requirement
+            if required_format["italics"] and not title_is_italicized:
+                formatting_incorrect = True
+                issue_message.append("should be italicized")
+            elif not required_format["italics"] and title_is_italicized:
+                formatting_incorrect = True
+                issue_message.append("should not be italicized")
+            # Check quotes requirement
+            if required_format["quotes"] and not title_in_quotes:
+                formatting_incorrect = True
+                issue_message.append("should be in quotes")
+            elif not required_format["quotes"] and title_in_quotes:
+                formatting_incorrect = True
+                issue_message.append("should not be in quotes")
+            if formatting_incorrect:
+                incorrect_titles.append({
+                    'text': full_match,
+                    'issue': ', '.join(issue_message)
+                })
     return len(incorrect_titles) == 0, incorrect_titles
+def get_document_checks(doc_type, template_type):
+    """Return expected outline and required headings based on document type and template type."""
+    document_checks = {
+        "Advisory Circular": {
+            "Short AC template AC": {
+                "required_headings": [
+                    "PURPOSE.",
+                    "APPLICABILITY.",
+                    "CANCELLATION.",
+                    "RELATED MATERIAL.",
+                    "DEFINITION OF KEY TERMS."
+                ]
+            },
+            "Long AC template AC": {
+                "required_headings": [
+                    "Purpose.",
+                    "Applicability.",
+                    "Cancellation.",
+                    "Related Material.",
+                    "Definition of Key Terms."
+                ]
+            }
+        },
+        "Airworthiness Criteria": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Deviation Memo": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Exemption": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Federal Register Notice": {
+            "required_headings": [
+                "Purpose of This Notice",
+                "Audience",
+                "Where can I Find This Notice"
+            ]
+        },
+        "Handbook/Manual": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Order": {
+            "required_headings": [
+                "Purpose of This Order.",
+                "Audience.",
+                "Where to Find This Order."
+            ]
+        },
+        "Policy Statement": {
+            "required_headings": [
+                "SUMMARY",
+                "CURRENT REGULATORY AND ADVISORY MATERIAL",
+                "RELEVANT PAST PRACTICE",
+                "POLICY",
+                "EFFECT OF POLICY",
+                "CONCLUSION"
+            ]
+        },
+        "Rule": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Special Condition": {
+            "required_headings": [
+                "TBD - Need to research"
+            ]
+        },
+        "Technical Standard Order": {
+            "required_headings": [
+                "PURPOSE.",
+                "APPLICABILITY.",
+                "REQUIREMENTS.",
+                "MARKING.",
+                "APPLICATION DATA REQUIREMENTS.",
+                "MANUFACTURER DATA REQUIREMENTS.",
+                "FURNISHED DATA REQUIREMENTS.",
+                "HOW TO GET REFERENCED DOCUMENTS."
+            ]
+        },
+        "Other": {
+            "required_headings": [
+                "N/A"
+            ]
+        }
+    }
+    # Add debugging logs
+    logger = logging.getLogger(__name__)
+    logger.info(f"Requested document type: {doc_type}")
+    logger.info(f"Requested template type: {template_type}")
+    if doc_type == "Advisory Circular":
+        checks = document_checks.get(doc_type, {}).get(template_type, {})
+    else:
+        checks = document_checks.get(doc_type, {})
+    logger.info(f"Retrieved checks: {checks}")
+    return checks
+def double_period_check(doc):
+    """Check for sentences that end with two periods."""
+    incorrect_sentences = []
+    for paragraph in doc:
+        # Split the paragraph into sentences based on common sentence-ending punctuation
+        sentences = re.split(r'(?<=[.!?]) +', paragraph)
+        for sentence in sentences:
+            if sentence.endswith('..'):
+                incorrect_sentences.append(sentence.strip())  # Log the incorrectly formatted sentence
+    return len(incorrect_sentences) == 0, incorrect_sentences  # Return True if no double periods are found, along with any incorrect sentences
 def spacing_check(doc):
+    """
+    Check for correct spacing in US federal regulatory documents.
+    Checks for:
+    - Spacing between document type and number (e.g., "AC 20-114")
+    - Spacing around section symbols (e.g., "§ 25.301")
+    - Spacing around part numbers (e.g., "Part 25")
+    - Spacing around paragraph indications (e.g., "(a)", "(1)")
+    - Double spaces between words
+    """
     incorrect_spacing = []
+    # Regex patterns to find incorrect spacing
+    doc_type_pattern = re.compile(r'(?<!\s)(AC|AD|CFR|FAA|N|SFAR)(\d+[-]?\d*)', re.IGNORECASE)
+    section_symbol_pattern = re.compile(r'(?<!\s)(§|§§)(\d+\.\d+)', re.IGNORECASE)
+    part_number_pattern = re.compile(r'(?<!\s)Part(\d+)', re.IGNORECASE)
+    paragraph_pattern = re.compile(r'(?<!\s)(\([a-z](?!\))|\([1-9](?!\)))', re.IGNORECASE)
+    double_space_pattern = re.compile(r'\s{2,}')
+    for paragraph in doc:
+        # Check for incorrect document type spacing
+        if doc_type_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect section symbol spacing
+        if section_symbol_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect part number spacing
+        if part_number_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for incorrect paragraph indication spacing
+        if paragraph_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
+        # Check for double spaces
+        if double_space_pattern.search(paragraph):
+            incorrect_spacing.append(paragraph)
     return len(incorrect_spacing) == 0, incorrect_spacing
+def check_prohibited_phrases(doc):
+    """Check for prohibited words or phrases."""
+    prohibited_phrases = [
+        r'\babove\b',
+        r'\bbelow\b',
+        r'\bthere is\b',
+        r'\bthere are\b'
+    ]
+    issues = []
+    for paragraph in doc:
+        for phrase in prohibited_phrases:
+            if re.search(phrase, paragraph, re.IGNORECASE):
+                issues.append((phrase.strip(r'\b'), paragraph.strip()))
+    return issues
 def check_abbreviation_usage(doc):
+    """Check for abbreviation consistency after first definition."""
+    abbreviations = {}
+    issues = []
+    for paragraph in doc:
+        # Find definitions like "Federal Aviation Administration (FAA)"
+        defined_matches = re.findall(r'\b([A-Za-z &]+)\s+\((\b[A-Z]{2,}\b)\)', paragraph)
+        for full_term, acronym in defined_matches:
+            if acronym not in abbreviations:
+                abbreviations[acronym] = {"full_term": full_term.strip(), "defined": True}
+        # Check for full term usage after definition
+        for acronym, data in abbreviations.items():
+            full_term = data["full_term"]
+            if full_term in paragraph:
+                # Ignore first usage where it's defined
+                if data["defined"]:
+                    data["defined"] = False  # Mark it as now defined
+                else:
+                    # Only flag subsequent occurrences
+                    issues.append((full_term, acronym, paragraph.strip()))
+    return issues
 def check_date_formats(doc):
+    """Check for inconsistent date formats."""
     date_issues = []
+    correct_date_pattern = re.compile(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b')
+    date_pattern = re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b')  # MM/DD/YYYY
+    for paragraph in doc:
+        if date_pattern.search(paragraph):
+            dates = date_pattern.findall(paragraph)
+            for date in dates:
+                if not correct_date_pattern.match(date):
+                    date_issues.append((date, paragraph.strip()))
     return date_issues
 def check_placeholders(doc):
+    """Check for placeholders that should be removed."""
+    placeholder_phrases = [
+        r'\bTBD\b',
+        r'\bTo be determined\b',
+        r'\bTo be added\b'
+    ]
+    issues = []
+    for paragraph in doc:
+        for phrase in placeholder_phrases:
+            if re.search(phrase, paragraph, re.IGNORECASE):
+                issues.append((phrase.strip(r'\b'), paragraph.strip()))
+    return issues
 def process_file(file_obj, doc_type, template_type):
     """
     # Placeholder Check
     results.append("## Placeholder Check")
     if not kwargs['placeholder_issues']:
+        results.append("✅ No future references or placeholders found.\n")
     else:
         results.append("❌ Placeholders Found:")
         for phrase, paragraph in kwargs['placeholder_issues']:
 with demo:
     gr.Markdown("# Document Checker Tool")
     gr.Markdown("Upload a Word (docx) document to check for compliance with U.S. federal documentation standards.")
+    gr.Markdown("### This tool is still in development")
     gr.Markdown("Contact Eric Putnam if you have questions and comments.")
     document_types = [