import re import gradio as gr def insert_points(text): # Start the counter at 1 counter = 1 # ----------------------------- # 0) NORMALIZE LINE ENDINGS AND REMOVE EXCESS WHITESPACE # ----------------------------- text = text.strip() # Replace Windows-style line endings text = text.replace('\r\n', '\n') # Collapse multiple blank lines into one text = re.sub(r'\n\s*\n+', '\n', text) # ----------------------------- # 1) FIRST PASS: INSERT NEWLINES BEFORE HEADINGS # This separates headings that might be in running text # ----------------------------- text = re.sub(r'(\s+)(#+\s+)', r'\1\n\2', text) # Special case for heading at the start without newline if text.startswith('#'): text = '\n' + text # ----------------------------- # 2) PREPARE FOR PROCESSING # ----------------------------- # First tag should be 001 and come at the beginning result = [f"[POSITION_{counter:03d}]\n"] counter += 1 # Split the text into segments (paragraphs and headings) segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text) segments = [s for s in segments if s] # Remove empty segments previous_was_tag = True # Since we just added the first tag # ----------------------------- # 3) PROCESS SEGMENTS # ----------------------------- for segment in segments: # If this is a newline, add a tag (but not after another tag) if segment == '\n': if not previous_was_tag: result.append(f"\n[POSITION_{counter:03d}]\n") counter += 1 previous_was_tag = True else: result.append('\n') # Just add the newline without a tag # If this is a heading, add a tag before it elif segment.startswith('\n#'): if not previous_was_tag: result.append(f"\n[POSITION_{counter:03d}]") counter += 1 previous_was_tag = True result.append(segment) previous_was_tag = False # Regular text segment else: result.append(segment) previous_was_tag = False # Join all segments back together text = ''.join(result) # ----------------------------- # 4) CLEAN UP: Normalize spacing around tags # ----------------------------- # Ensure exactly one newline before each tag text = re.sub(r'([^\n])\[POSITION_', r'\1\n[POSITION_', text) text = re.sub(r'\n+(\[POSITION_)', r'\n\1', text) # Ensure exactly one newline after each tag text = re.sub(r'(\[POSITION_\d{3}])([^\n])', r'\1\n\2', text) text = re.sub(r'(\[POSITION_\d{3}])\n+', r'\1\n', text) # Remove any newlines at the very beginning of the text text = re.sub(r'^\n+', '', text) # Ensure no consecutive tags text = re.sub(r'(\[POSITION_\d{3}]\n)\s*\[POSITION_\d{3}]', r'\1', text) return text # ----------------------------- # GRADIO INTERFACE # ----------------------------- demo = gr.Interface( fn=insert_points, inputs=gr.Textbox( lines=10, placeholder="Paste your text here...", label="Your Input Text" ), outputs=gr.Textbox( label="Processed Text with Tags", show_copy_button=True # Enable copy button ), title="Insert Point Tagger", description=( "This processor inserts numbered tags between paragraphs and before #-headers" ), ) if __name__ == "__main__": demo.launch()