Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| def insert_points(text): | |
| # Start the counter at 1 | |
| counter = 1 | |
| # ----------------------------- | |
| # 0) NORMALIZE LINE ENDINGS AND REMOVE EXCESS WHITESPACE | |
| # ----------------------------- | |
| text = text.strip() | |
| # Replace Windows-style line endings | |
| text = text.replace('\r\n', '\n') | |
| # Collapse multiple blank lines into one | |
| text = re.sub(r'\n\s*\n+', '\n', text) | |
| # ----------------------------- | |
| # 1) FIRST PASS: INSERT NEWLINES BEFORE HEADINGS | |
| # This separates headings that might be in running text | |
| # ----------------------------- | |
| text = re.sub(r'(\s+)(#+\s+)', r'\1\n\2', text) | |
| # Special case for heading at the start without newline | |
| if text.startswith('#'): | |
| text = '\n' + text | |
| # ----------------------------- | |
| # 2) PREPARE FOR PROCESSING | |
| # ----------------------------- | |
| # First tag should be 001 and come at the beginning | |
| result = [f"[POSITION_{counter:03d}]\n"] | |
| counter += 1 | |
| # Split the text into segments (paragraphs and headings) | |
| segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text) | |
| segments = [s for s in segments if s] # Remove empty segments | |
| previous_was_tag = True # Since we just added the first tag | |
| # ----------------------------- | |
| # 3) PROCESS SEGMENTS | |
| # ----------------------------- | |
| for segment in segments: | |
| # If this is a newline, add a tag (but not after another tag) | |
| if segment == '\n': | |
| if not previous_was_tag: | |
| result.append(f"\n[POSITION_{counter:03d}]\n") | |
| counter += 1 | |
| previous_was_tag = True | |
| else: | |
| result.append('\n') # Just add the newline without a tag | |
| # If this is a heading, add a tag before it | |
| elif segment.startswith('\n#'): | |
| if not previous_was_tag: | |
| result.append(f"\n[POSITION_{counter:03d}]") | |
| counter += 1 | |
| previous_was_tag = True | |
| result.append(segment) | |
| previous_was_tag = False | |
| # Regular text segment | |
| else: | |
| result.append(segment) | |
| previous_was_tag = False | |
| # Join all segments back together | |
| text = ''.join(result) | |
| # ----------------------------- | |
| # 4) CLEAN UP: Normalize spacing around tags | |
| # ----------------------------- | |
| # Ensure exactly one newline before each tag | |
| text = re.sub(r'([^\n])\[POSITION_', r'\1\n[POSITION_', text) | |
| text = re.sub(r'\n+(\[POSITION_)', r'\n\1', text) | |
| # Ensure exactly one newline after each tag | |
| text = re.sub(r'(\[POSITION_\d{3}])([^\n])', r'\1\n\2', text) | |
| text = re.sub(r'(\[POSITION_\d{3}])\n+', r'\1\n', text) | |
| # Remove any newlines at the very beginning of the text | |
| text = re.sub(r'^\n+', '', text) | |
| # Ensure no consecutive tags | |
| text = re.sub(r'(\[POSITION_\d{3}]\n)\s*\[POSITION_\d{3}]', r'\1', text) | |
| return text | |
| # ----------------------------- | |
| # GRADIO INTERFACE | |
| # ----------------------------- | |
| demo = gr.Interface( | |
| fn=insert_points, | |
| inputs=gr.Textbox( | |
| lines=10, | |
| placeholder="Paste your text here...", | |
| label="Your Input Text" | |
| ), | |
| outputs=gr.Textbox( | |
| label="Processed Text with Tags", | |
| show_copy_button=True # Enable copy button | |
| ), | |
| title="Insert Point Tagger", | |
| description=( | |
| "This processor inserts numbered tags between paragraphs and before #-headers" | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |