Spaces:

BtB-ExpC
/

InsertElementsTags

Sleeping

File size: 3,529 Bytes

import re
import gradio as gr


def insert_points(text):
    # Start the counter at 1
    counter = 1

    # -----------------------------
    # 0) NORMALIZE LINE ENDINGS AND REMOVE EXCESS WHITESPACE
    # -----------------------------
    text = text.strip()
    # Replace Windows-style line endings
    text = text.replace('\r\n', '\n')
    # Collapse multiple blank lines into one
    text = re.sub(r'\n\s*\n+', '\n', text)

    # -----------------------------
    # 1) FIRST PASS: INSERT NEWLINES BEFORE HEADINGS
    #    This separates headings that might be in running text
    # -----------------------------
    text = re.sub(r'(\s+)(#+\s+)', r'\1\n\2', text)

    # Special case for heading at the start without newline
    if text.startswith('#'):
        text = '\n' + text

    # -----------------------------
    # 2) PREPARE FOR PROCESSING
    # -----------------------------
    # First tag should be 001 and come at the beginning
    result = [f"[POSITION_{counter:03d}]\n"]
    counter += 1

    # Split the text into segments (paragraphs and headings)
    segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text)
    segments = [s for s in segments if s]  # Remove empty segments

    previous_was_tag = True  # Since we just added the first tag

    # -----------------------------
    # 3) PROCESS SEGMENTS
    # -----------------------------
    for segment in segments:
        # If this is a newline, add a tag (but not after another tag)
        if segment == '\n':
            if not previous_was_tag:
                result.append(f"\n[POSITION_{counter:03d}]\n")
                counter += 1
                previous_was_tag = True
            else:
                result.append('\n')  # Just add the newline without a tag

        # If this is a heading, add a tag before it
        elif segment.startswith('\n#'):
            if not previous_was_tag:
                result.append(f"\n[POSITION_{counter:03d}]")
                counter += 1
                previous_was_tag = True
            result.append(segment)
            previous_was_tag = False

        # Regular text segment
        else:
            result.append(segment)
            previous_was_tag = False

    # Join all segments back together
    text = ''.join(result)

    # -----------------------------
    # 4) CLEAN UP: Normalize spacing around tags
    # -----------------------------
    # Ensure exactly one newline before each tag
    text = re.sub(r'([^\n])\[POSITION_', r'\1\n[POSITION_', text)
    text = re.sub(r'\n+(\[POSITION_)', r'\n\1', text)

    # Ensure exactly one newline after each tag
    text = re.sub(r'(\[POSITION_\d{3}])([^\n])', r'\1\n\2', text)
    text = re.sub(r'(\[POSITION_\d{3}])\n+', r'\1\n', text)

    # Remove any newlines at the very beginning of the text
    text = re.sub(r'^\n+', '', text)

    # Ensure no consecutive tags
    text = re.sub(r'(\[POSITION_\d{3}]\n)\s*\[POSITION_\d{3}]', r'\1', text)

    return text


# -----------------------------
#  GRADIO INTERFACE
# -----------------------------
demo = gr.Interface(
    fn=insert_points,
    inputs=gr.Textbox(
        lines=10,
        placeholder="Paste your text here...",
        label="Your Input Text"
    ),
    outputs=gr.Textbox(
        label="Processed Text with Tags",
        show_copy_button=True  # Enable copy button
    ),
    title="Insert Point Tagger",
    description=(
        "This processor inserts numbered tags between paragraphs and before #-headers"
    ),
)

if __name__ == "__main__":
    demo.launch()