Spaces:

BtB-ExpC
/

InsertElementsTags

Sleeping

App Files Files Community

BtB-ExpC commited on Apr 14, 2025

Commit

73a9e74

1 Parent(s): f414276

normalized line endings that might come from Word \r

Browse files

Files changed (1) hide show

app.py +23 -29

app.py CHANGED Viewed

@@ -2,9 +2,14 @@ import re
 import gradio as gr
 def insert_points(text):
     counter = 1
-    # This function will be called for each match by re.sub in the first pass
     def replacer(match):
         nonlocal counter
         tag = f"[INSERT_POINT_{counter:03d}]"
@@ -15,52 +20,41 @@ def insert_points(text):
     # --- Step 1: Insert tags before hash sequences and consolidated newline sequences ---
     # Pattern matches:
-    # 1) '\n+' : One or more consecutive newline characters. This handles basic blank lines.
-    #             To handle lines with only whitespace, a more complex pattern like
-    #             '(\s*\n)+\s*' might be needed, but let's stick to '\n+' based on the example.
-    #             This change ensures that \n\n or \n\n\n only trigger ONE tag.
     # 2) '\#+' : One or more consecutive '#' characters.
-    # This pattern might still create "[TAG1]\n[TAG2]###" if a newline immediately precedes a heading.
     pattern_initial = r'(\n+|\#+)'
     processed_text = re.sub(pattern_initial, replacer, text)
     # --- Step 2: Clean up potential heading splits ---
-    # This step addresses the case where Step 1 resulted in:
-    # [INSERT_POINT_XXX]<whitespace like \n>[INSERT_POINT_YYY]### Heading
-    # We want to remove the first tag and the intermediate whitespace, keeping the tag associated with the ###.
-    # The cleanup pattern finds:
-    #   (\[INSERT_POINT_\d{3}\]) : Capture Group 1: The tag before the newline/whitespace (e.g., TAG_XXX)
-    #   \s* : Any intermediate whitespace (importantly, including the newline)
-    #   (\[INSERT_POINT_\d{3}\]) : Capture Group 2: The tag right before the hashes (e.g., TAG_YYY)
-    #   (\#+)                    : Capture Group 3: The actual hash sequence (e.g., ###)
     cleanup_pattern = r'(\[INSERT_POINT_\d{3}\])\s*(\[INSERT_POINT_\d{3}\])(\#+)'
-    # The replacement uses:
-    #   \2 : Capture Group 2 (the tag we want to keep, TAG_YYY)
-    #   \3 : Capture Group 3 (the hash sequence)
-    # This effectively deletes the first tag (Group 1) and the intermediate whitespace.
     processed_text = re.sub(cleanup_pattern, r'\2\3', processed_text)
     return processed_text
-# --- Gradio Interface Code (Updated Description) ---
 demo = gr.Interface(
     fn=insert_points,
     inputs=gr.Textbox(
         lines=10,
-        placeholder="Paste your text here...",
         label="Your Input Text"
     ),
-    outputs=gr.Textbox(label="Processed Text with Tags"),
-    title="Insert Point Tagger",
     description=(
-        "Paste a block of text and get '[INSERT_POINT_###]' tags added:\n"
-        "1) **before** each sequence of one or more '#' characters (headings).\n"
-        "2) **before** each sequence of one or more newline characters (e.g., one tag for line breaks or blank lines)."
     ),
-    # You might add allow_flagging='never' if you don't need the flagging feature
-    # allow_flagging='never'
 )
 if __name__ == "__main__":

 import gradio as gr
 def insert_points(text):
+    # --- Step 0: Normalize line endings ---
+    # Replace Windows (\r\n) and old Mac (\r) line endings with Unix (\n)
+    # This simplifies the regex patterns below.
+    text = text.replace('\r\n', '\n').replace('\r', '\n')
     counter = 1
+    # Replacer function for the initial insertion pass
     def replacer(match):
         nonlocal counter
         tag = f"[INSERT_POINT_{counter:03d}]"
     # --- Step 1: Insert tags before hash sequences and consolidated newline sequences ---
     # Pattern matches:
+    # 1) '\n+' : One or more consecutive (now normalized) newline characters. Collapses blank lines.
     # 2) '\#+' : One or more consecutive '#' characters.
     pattern_initial = r'(\n+|\#+)'
     processed_text = re.sub(pattern_initial, replacer, text)
     # --- Step 2: Clean up potential heading splits ---
+    # This fixes cases where Step 1 resulted in "[TAG_A]\n[TAG_B]###"
+    # It looks for TAG_A, followed by whitespace (\s* includes the \n),
+    # followed by TAG_B, followed by hashes (#+).
     cleanup_pattern = r'(\[INSERT_POINT_\d{3}\])\s*(\[INSERT_POINT_\d{3}\])(\#+)'
+    # Replaces the whole match with just TAG_B (\2) and the hashes (\3).
     processed_text = re.sub(cleanup_pattern, r'\2\3', processed_text)
     return processed_text
+# --- Gradio Interface Code (with Copy Button) ---
 demo = gr.Interface(
     fn=insert_points,
     inputs=gr.Textbox(
         lines=10,
+        placeholder="Paste your text here...\n(Line endings will be normalized)",
         label="Your Input Text"
     ),
+    outputs=gr.Textbox(
+        label="Processed Text with Tags",
+        show_copy_button=True # <--- Added copy button here
+    ),
+    title="Insert Point Tagger v3", # Optional: update title
     description=(
+        "Paste text to add '[INSERT_POINT_###]' tags:\n"
+        "1) **Before** each '#' sequence (headings).\n"
+        "2) **Before** each sequence of one or more newlines (one tag per line break/blank line).\n"
+        "Normalizes line endings and cleans up heading tag spacing."
     ),
+    allow_flagging='never'
 )
 if __name__ == "__main__":