Spaces:

BtB-ExpC
/

InsertElementsTags

Sleeping

App Files Files Community

BtB-ExpC commited on Apr 14, 2025

Commit

ed14152

1 Parent(s): 225d229

position

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +15 -20

README.md CHANGED Viewed

@@ -7,7 +7,7 @@ sdk: gradio
 sdk_version: 5.25.0
 app_file: app.py
 pinned: false
-short_description: Inserts [INSERT_POINT_###] tags in large texts
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 5.25.0
 app_file: app.py
 pinned: false
+short_description: Inserts [POSITION_###] tags in large texts
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 def insert_points(text):
     counter = 1
     # -----------------------------
@@ -25,22 +26,26 @@ def insert_points(text):
         text = '\n' + text
     # -----------------------------
-    # 2) SPLIT TEXT INTO SEGMENTS AND TAG
-    #    Process the text in a single pass to avoid duplicate tags
     # -----------------------------
     # Split the text into segments (paragraphs and headings)
     segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text)
     segments = [s for s in segments if s]  # Remove empty segments
-    result = []
-    previous_was_tag = False
     for segment in segments:
         # If this is a newline, add a tag (but not after another tag)
         if segment == '\n':
             if not previous_was_tag:
-                result.append(f"\n[INSERT_POINT_{counter:03d}]\n")
                 counter += 1
                 previous_was_tag = True
             else:
@@ -49,7 +54,7 @@ def insert_points(text):
         # If this is a heading, add a tag before it
         elif segment.startswith('\n#'):
             if not previous_was_tag:
-                result.append(f"\n[INSERT_POINT_{counter:03d}]")
                 counter += 1
                 previous_was_tag = True
             result.append(segment)
@@ -63,22 +68,15 @@ def insert_points(text):
     # Join all segments back together
     text = ''.join(result)
-    # -----------------------------
-    # 3) ADD TAG AT THE BEGINNING IF NEEDED
-    # -----------------------------
-    if not text.startswith('[INSERT_POINT_'):
-        text = f"[INSERT_POINT_{counter:03d}]\n" + text
-        counter += 1
     # -----------------------------
     # 4) CLEAN UP: Remove excess newlines
     # -----------------------------
     # Remove extra blank lines before tags
-    text = re.sub(r'\n\n+(\[INSERT_POINT_)', r'\n\1', text)
     # Remove extra blank lines at the beginning
     text = re.sub(r'^\n+', '', text)
     # Ensure no consecutive tags
-    text = re.sub(r'(\[INSERT_POINT_\d{3}]\n)\s*\[INSERT_POINT_\d{3}]', r'\1', text)
     return text
@@ -96,11 +94,8 @@ demo = gr.Interface(
     outputs=gr.Textbox(label="Processed Text with Tags"),
     title="Insert Point Tagger",
     description=(
-        "This processor:\n"
-        "1) Tags headings and paragraph breaks with sequential numbers.\n"
-        "2) Places each tag on its own line.\n"
-        "3) Ensures consistent, sequential numbering (001, 002, etc.).\n"
-        "4) Avoids consecutive tags - never two tags in a row."
     ),
 )

 def insert_points(text):
+    # Start the counter at 1
     counter = 1
     # -----------------------------
         text = '\n' + text
     # -----------------------------
+    # 2) PREPARE FOR PROCESSING
     # -----------------------------
+    # First tag should be 001 and come at the beginning
+    result = [f"[POSITION_{counter:03d}]\n"]
+    counter += 1
     # Split the text into segments (paragraphs and headings)
     segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text)
     segments = [s for s in segments if s]  # Remove empty segments
+    previous_was_tag = True  # Since we just added the first tag
+    # -----------------------------
+    # 3) PROCESS SEGMENTS
+    # -----------------------------
     for segment in segments:
         # If this is a newline, add a tag (but not after another tag)
         if segment == '\n':
             if not previous_was_tag:
+                result.append(f"\n[POSITION_{counter:03d}]\n")
                 counter += 1
                 previous_was_tag = True
             else:
         # If this is a heading, add a tag before it
         elif segment.startswith('\n#'):
             if not previous_was_tag:
+                result.append(f"\n[POSITION_{counter:03d}]")
                 counter += 1
                 previous_was_tag = True
             result.append(segment)
     # Join all segments back together
     text = ''.join(result)
     # -----------------------------
     # 4) CLEAN UP: Remove excess newlines
     # -----------------------------
     # Remove extra blank lines before tags
+    text = re.sub(r'\n\n+(\[POSITION_)', r'\n\1', text)
     # Remove extra blank lines at the beginning
     text = re.sub(r'^\n+', '', text)
     # Ensure no consecutive tags
+    text = re.sub(r'(\[POSITION_\d{3}]\n)\s*\[POSITION_\d{3}]', r'\1', text)
     return text
     outputs=gr.Textbox(label="Processed Text with Tags"),
     title="Insert Point Tagger",
     description=(
+                "This processor inserts numbered tags between paragraphs and before #-headers"
     ),
 )